{
 "cells": [
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "WKUPkA0TJzbW",
    "outputId": "909eba98-db9c-482f-fb6b-b20b68bce069",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:44.172176Z",
     "start_time": "2025-03-19T06:31:44.164609Z"
    }
   },
   "source": [
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import sklearn\n",
    "import pandas as pd\n",
    "import os\n",
    "import sys\n",
    "import time\n",
    "from tqdm.auto import tqdm\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "print(sys.version_info)\n",
    "for module in mpl, np, pd, sklearn, torch:\n",
    "    print(module.__name__, module.__version__)\n",
    "\n",
    "device = torch.device(\"cuda:0\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
    "print(device)\n",
    "\n",
    "seed = 42\n",
    "torch.manual_seed(seed)\n",
    "torch.cuda.manual_seed_all(seed)\n",
    "np.random.seed(seed)\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sys.version_info(major=3, minor=12, micro=3, releaselevel='final', serial=0)\n",
      "matplotlib 3.10.0\n",
      "numpy 2.0.2\n",
      "pandas 2.2.3\n",
      "sklearn 1.6.1\n",
      "torch 2.6.0+cu126\n",
      "cuda:0\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "pSNcMyqvJzbY"
   },
   "source": [
    "## 数据加载"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Dm1DrSCLJzbZ",
    "outputId": "ab7f8ef9-6119-4d2b-df23-7cc04f617802",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:48.310649Z",
     "start_time": "2025-03-19T06:31:48.306165Z"
    }
   },
   "source": [
    "import unicodedata\n",
    "import re\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "#因为西班牙语有一些是特殊字符，所以我们需要unicode转ascii，\n",
    "# 这样值变小了，因为unicode太大\n",
    "def unicode_to_ascii(s):\n",
    "    #NFD是转换方法，把每一个字节拆开，Mn是重音，所以去除\n",
    "    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')\n",
    "\n",
    "#下面我们找个样本测试一下\n",
    "# 加u代表对字符串进行unicode编码\n",
    "en_sentence = u\"May I borrow this book?\"\n",
    "sp_sentence = u\"¿Puedo tomar prestado este libro?\"\n",
    "\n",
    "print(unicode_to_ascii(en_sentence))\n",
    "print(unicode_to_ascii(sp_sentence))\n",
    "\n",
    "\n"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "May I borrow this book?\n",
      "¿Puedo tomar prestado este libro?\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "code",
   "source": [
    "def preprocess_sentence(w):\n",
    "    #变为小写，去掉多余的空格，变成小写，id少一些\n",
    "    w = unicode_to_ascii(w.lower().strip())\n",
    "\n",
    "    # 在单词与跟在其后的标点符号之间插入一个空格\n",
    "    # eg: \"he is a boy.\" => \"he is a boy . \"\n",
    "    # Reference:- https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation\n",
    "    w = re.sub(r\"([?.!,¿])\", r\" \\1 \", w)\n",
    "    #因为可能有多余空格，替换为一个空格，所以处理一下\n",
    "    w = re.sub(r'[\" \"]+', \" \", w)\n",
    "\n",
    "    # 除了 (a-z, A-Z, \".\", \"?\", \"!\", \",\")，将所有字符替换为空格，你可以保留一些标点符号\n",
    "    w = re.sub(r\"[^a-zA-Z?.!,¿]+\", \" \", w)\n",
    "\n",
    "    w = w.rstrip().strip()\n",
    "\n",
    "    return w\n",
    "\n",
    "print(preprocess_sentence(en_sentence))\n",
    "print(preprocess_sentence(sp_sentence))\n",
    "print(preprocess_sentence(sp_sentence).encode('utf-8'))  #¿是占用两个字节的"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:49.821684Z",
     "start_time": "2025-03-19T06:31:49.817529Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "may i borrow this book ?\n",
      "¿ puedo tomar prestado este libro ?\n",
      "b'\\xc2\\xbf puedo tomar prestado este libro ?'\n"
     ]
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "YyJksrNmJzba"
   },
   "source": [
    "Dataset"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "#zip例子\n",
    "a = [[1,2],[4,5],[7,8]]\n",
    "zipped = list(zip(*a))\n",
    "print(zipped)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:54.647867Z",
     "start_time": "2025-03-19T06:31:54.644867Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(1, 4, 7), (2, 5, 8)]\n"
     ]
    }
   ],
   "execution_count": 22
  },
  {
   "cell_type": "code",
   "source": [
    "split_index1 = np.random.choice(a=[\"train\", \"test\"], replace=True, p=[0.9, 0.1], size=100)\n",
    "split_index1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:55.107965Z",
     "start_time": "2025-03-19T06:31:55.103476Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['train', 'test', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'test', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'test', 'test',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'test', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'test', 'train', 'test', 'train', 'train', 'test',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'test',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train', 'train', 'train', 'train', 'train', 'train',\n",
       "       'train', 'train'], dtype='<U5')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 23
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "-VnoIKhaJzba",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:56.946688Z",
     "start_time": "2025-03-19T06:31:55.827617Z"
    }
   },
   "source": [
    "from pathlib import Path\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "class LangPairDataset(Dataset):\n",
    "    fpath = Path(r\"./data_spa_en/spa.txt\") #数据文件路径\n",
    "    cache_path = Path(r\"./.cache/lang_pair.npy\") #缓存文件路径\n",
    "    split_index = np.random.choice(a=[\"train\", \"test\"], replace=True, p=[0.9, 0.1], size=118964) #按照9:1划分训练集和测试集\n",
    "    def __init__(self, mode=\"train\", cache=False):\n",
    "        if cache or not self.cache_path.exists():#如果没有缓存，或者缓存不存在，就处理一下数据\n",
    "            self.cache_path.parent.mkdir(parents=True, exist_ok=True) #创建缓存文件夹，如果存在就忽略\n",
    "            with open(self.fpath, \"r\", encoding=\"utf8\") as file:\n",
    "                lines = file.readlines()\n",
    "                lang_pair = [[preprocess_sentence(w) for w in l.split('\\t')]  for l in lines] #处理数据，变成list((trg, src))的形式\n",
    "                trg, src = zip(*lang_pair) #分离出目标语言和源语言\n",
    "                trg=np.array(trg) #转换为numpy数组\n",
    "                src=np.array(src) #转换为numpy数组\n",
    "                np.save(self.cache_path, {\"trg\": trg, \"src\": src})  #保存为npy文件,方便下次直接读取,不用再处理\n",
    "        else:\n",
    "            lang_pair = np.load(self.cache_path, allow_pickle=True).item() #读取npy文件，allow_pickle=True允许读取字典\n",
    "            trg = lang_pair[\"trg\"]\n",
    "            src = lang_pair[\"src\"]\n",
    "\n",
    "        self.trg = trg[self.split_index == mode] #按照index拿到训练集的 标签语言 --英语\n",
    "        self.src = src[self.split_index == mode] #按照index拿到训练集的源语言 --西班牙\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        return self.src[index], self.trg[index]\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.src)\n",
    "\n",
    "\n",
    "train_ds = LangPairDataset(\"train\")\n",
    "test_ds = LangPairDataset(\"test\")"
   ],
   "outputs": [],
   "execution_count": 24
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "knue-PUkJzbb",
    "outputId": "86c1d3c8-8bd2-4c7f-8576-7516c4767ec2",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:31:58.557878Z",
     "start_time": "2025-03-19T06:31:58.554755Z"
    }
   },
   "source": "print(\"source: {}\\ntarget: {}\".format(*train_ds[-1]))",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "source: si quieres sonar como un hablante nativo , debes estar dispuesto a practicar diciendo la misma frase una y otra vez de la misma manera en que un musico de banjo practica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado .\n",
      "target: if you want to sound like a native speaker , you must be willing to practice saying the same sentence over and over in the same way that banjo players practice the same phrase over and over until they can play it correctly and at the desired tempo .\n"
     ]
    }
   ],
   "execution_count": 25
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9mzBlPtGJzbb"
   },
   "source": [
    "### Tokenizer\n",
    "\n",
    "这里有两种处理方式，分别对应着 encoder 和 decoder 的 word embedding 是否共享，这里实现不共享的方案。"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "fMSIczSnJzbb",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:03.958075Z",
     "start_time": "2025-03-19T06:32:03.395624Z"
    }
   },
   "source": [
    "from collections import Counter\n",
    "\n",
    "def get_word_idx(ds, mode=\"src\", threshold=2):\n",
    "    #载入词表，看下词表长度，词表就像英语字典\n",
    "    word2idx = {\n",
    "        \"[PAD]\": 0,     # 填充 token\n",
    "        \"[BOS]\": 1,     # begin of sentence\n",
    "        \"[UNK]\": 2,     # 未知 token\n",
    "        \"[EOS]\": 3,     # end of sentence\n",
    "    }\n",
    "    idx2word = {value: key for key, value in word2idx.items()}\n",
    "    index = len(idx2word)\n",
    "    threshold = 1  # 出现次数低于此的token舍弃\n",
    "    #如果数据集有很多个G，那是用for循环的，不能' '.join\n",
    "    word_list = \" \".join([pair[0 if mode==\"src\" else 1] for pair in ds]).split()\n",
    "    # print(type(word_list))\n",
    "    counter = Counter(word_list) #统计词频,counter类似字典，key是单词，value是出现次数\n",
    "    print(\"word count:\", len(counter))\n",
    "\n",
    "    for token, count in counter.items():\n",
    "        if count >= threshold:#出现次数大于阈值的token加入词表\n",
    "            word2idx[token] = index #加入词表\n",
    "            idx2word[index] = token #加入反向词表\n",
    "            index += 1\n",
    "\n",
    "    return word2idx, idx2word\n",
    "\n",
    "src_word2idx, src_idx2word = get_word_idx(train_ds, \"src\") #源语言词表  西班牙语\n",
    "trg_word2idx, trg_idx2word = get_word_idx(train_ds, \"trg\") #目标语言词表 英语"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "word count: 23715\n",
      "word count: 12500\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "9_IjY_wIJzbb",
    "outputId": "f2bf8be3-ec47-48e2-b743-1d2dbd511adc",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:03.979254Z",
     "start_time": "2025-03-19T06:32:03.959072Z"
    }
   },
   "source": [
    "class Tokenizer:\n",
    "    def __init__(self, word2idx, idx2word, max_length=500, pad_idx=0, bos_idx=1, eos_idx=3, unk_idx=2):\n",
    "        self.word2idx = word2idx\n",
    "        self.idx2word = idx2word\n",
    "        self.max_length = max_length\n",
    "        self.pad_idx = pad_idx\n",
    "        self.bos_idx = bos_idx\n",
    "        self.eos_idx = eos_idx\n",
    "        self.unk_idx = unk_idx\n",
    "\n",
    "    def encode(self, text_list, padding_first=False, add_bos=True, add_eos=True, return_mask=False):\n",
    "        \"\"\"如果padding_first == True，则padding加载前面，否则加载后面\n",
    "        return_mask: 是否返回mask(掩码），mask用于指示哪些是padding的，哪些是真实的token\n",
    "        \"\"\"\n",
    "        max_length = min(self.max_length, add_eos + add_bos + max([len(text) for text in text_list]))\n",
    "        indices_list = []\n",
    "        for text in text_list:\n",
    "            indices = [self.word2idx.get(word, self.unk_idx) for word in text[:max_length - add_bos - add_eos]] #如果词表中没有这个词，就用unk_idx代替，indices是一个list,里面是每个词的index,也就是一个样本的index\n",
    "            if add_bos:\n",
    "                indices = [self.bos_idx] + indices\n",
    "            if add_eos:\n",
    "                indices = indices + [self.eos_idx]\n",
    "            if padding_first:#padding加载前面，超参可以调\n",
    "                indices = [self.pad_idx] * (max_length - len(indices)) + indices\n",
    "            else:#padding加载后面\n",
    "                indices = indices + [self.pad_idx] * (max_length - len(indices))\n",
    "            indices_list.append(indices)\n",
    "        input_ids = torch.tensor(indices_list) #转换为tensor\n",
    "        masks = (input_ids == self.pad_idx).to(dtype=torch.int64) #mask是一个和input_ids一样大小的tensor，0代表token，1代表padding，mask用于去除padding的影响\n",
    "        return input_ids if not return_mask else (input_ids, masks)\n",
    "\n",
    "\n",
    "    def decode(self, indices_list, remove_bos=True, remove_eos=True, remove_pad=True, split=False):\n",
    "        text_list = []\n",
    "        for indices in indices_list:\n",
    "            text = []\n",
    "            for index in indices:\n",
    "                word = self.idx2word.get(index, \"[UNK]\") #如果词表中没有这个词，就用unk_idx代替\n",
    "                if remove_bos and word == \"[BOS]\":\n",
    "                    continue\n",
    "                if remove_eos and word == \"[EOS]\":#如果到达eos，就结束\n",
    "                    break\n",
    "                if remove_pad and word == \"[PAD]\":#如果到达pad，就结束\n",
    "                    break\n",
    "                text.append(word) #单词添加到列表中\n",
    "            text_list.append(\" \".join(text) if not split else text) #把列表中的单词拼接，变为一个句子\n",
    "        return text_list\n",
    "\n",
    "#两个相对于1个toknizer的好处是embedding的参数量减少\n",
    "src_tokenizer = Tokenizer(word2idx=src_word2idx, idx2word=src_idx2word) #源语言tokenizer\n",
    "trg_tokenizer = Tokenizer(word2idx=trg_word2idx, idx2word=trg_idx2word) #目标语言tokenizer\n",
    "\n",
    "# trg_tokenizer.encode([[\"hello\"], [\"hello\", \"world\"]], add_bos=True, add_eos=False,return_mask=True)\n",
    "raw_text = [\"hello world\".split(), \"tokenize text datas with batch\".split(), \"this is a test\".split()]\n",
    "indices,mask = trg_tokenizer.encode(raw_text, padding_first=False, add_bos=True, add_eos=True,return_mask=True)\n",
    "decode_text = trg_tokenizer.decode(indices.tolist(), remove_bos=False, remove_eos=False, remove_pad=False)\n",
    "print(\"raw text\"+'-'*10)\n",
    "for raw in raw_text:\n",
    "    print(raw)\n",
    "print(\"mask\"+'-'*10)\n",
    "for m in mask:\n",
    "    print(m)\n",
    "print(\"indices\"+'-'*10)\n",
    "for index in indices:\n",
    "    print(index)\n",
    "print(\"decode text\"+'-'*10)\n",
    "for decode in decode_text:\n",
    "    print(decode)"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "raw text----------\n",
      "['hello', 'world']\n",
      "['tokenize', 'text', 'datas', 'with', 'batch']\n",
      "['this', 'is', 'a', 'test']\n",
      "mask----------\n",
      "tensor([0, 0, 0, 0, 1, 1, 1])\n",
      "tensor([0, 0, 0, 0, 0, 0, 0])\n",
      "tensor([0, 0, 0, 0, 0, 0, 1])\n",
      "indices----------\n",
      "tensor([   1,   16, 3218,    3,    0,    0,    0])\n",
      "tensor([   1,    2, 3878,    2,  552,    2,    3])\n",
      "tensor([   1,  117,  235,  103, 2896,    3,    0])\n",
      "decode text----------\n",
      "[BOS] hello world [EOS] [PAD] [PAD] [PAD]\n",
      "[BOS] [UNK] text [UNK] with [UNK] [EOS]\n",
      "[BOS] this is a test [EOS] [PAD]\n"
     ]
    }
   ],
   "execution_count": 27
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "S8BDjaa1Jzbc"
   },
   "source": [
    "### DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "sPwlGzn8Jzbc",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:03.983679Z",
     "start_time": "2025-03-19T06:32:03.979254Z"
    }
   },
   "source": [
    "def collate_fct(batch):\n",
    "    src_words = [pair[0].split() for pair in batch]\n",
    "    trg_words = [pair[1].split() for pair in batch]\n",
    "\n",
    "    # [PAD] [BOS] src [EOS]\n",
    "    encoder_inputs, encoder_inputs_mask = src_tokenizer.encode(\n",
    "        src_words, padding_first=True, add_bos=True, add_eos=True, return_mask=True\n",
    "        )\n",
    "\n",
    "    # [BOS] trg [PAD]\n",
    "    decoder_inputs = trg_tokenizer.encode(\n",
    "        trg_words, padding_first=False, add_bos=True, add_eos=False, return_mask=False,\n",
    "        )\n",
    "\n",
    "    # trg [EOS] [PAD]\n",
    "    decoder_labels, decoder_labels_mask = trg_tokenizer.encode(\n",
    "        trg_words, padding_first=False, add_bos=False, add_eos=True, return_mask=True\n",
    "        )\n",
    "\n",
    "    return {\n",
    "        \"encoder_inputs\": encoder_inputs.to(device=device),\n",
    "        \"encoder_inputs_mask\": encoder_inputs_mask.to(device=device),\n",
    "        \"decoder_inputs\": decoder_inputs.to(device=device),\n",
    "        \"decoder_labels\": decoder_labels.to(device=device),\n",
    "        \"decoder_labels_mask\": decoder_labels_mask.to(device=device),\n",
    "    } #当返回的数据较多时，用dict返回比较合理\n"
   ],
   "outputs": [],
   "execution_count": 28
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "_JsuutYAJzbc",
    "outputId": "fd68e596-ed01-4bae-d731-c27f5a758a6c",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.108961Z",
     "start_time": "2025-03-19T06:32:03.984676Z"
    }
   },
   "source": [
    "sample_dl = DataLoader(train_ds, batch_size=2, shuffle=True, collate_fn=collate_fct)\n",
    "\n",
    "for batch in sample_dl:\n",
    "    for key, value in batch.items():\n",
    "        print(key)\n",
    "        print(value)\n",
    "    break"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "encoder_inputs\n",
      "tensor([[   0,    1,   55,   67, 1056,  306,   50,    5,    3],\n",
      "        [   1,   92, 5604,   50, 2622,  489, 3758,    5,    3]],\n",
      "       device='cuda:0')\n",
      "encoder_inputs_mask\n",
      "tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
      "        [0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0')\n",
      "decoder_inputs\n",
      "tensor([[   1,   17,   32,  516,   30, 1088, 1577,    5,    0],\n",
      "        [   1,   47, 2976,  689, 5400, 2238,  634,   29,    5]],\n",
      "       device='cuda:0')\n",
      "decoder_labels\n",
      "tensor([[  17,   32,  516,   30, 1088, 1577,    5,    3,    0],\n",
      "        [  47, 2976,  689, 5400, 2238,  634,   29,    5,    3]],\n",
      "       device='cuda:0')\n",
      "decoder_labels_mask\n",
      "tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1],\n",
      "        [0, 0, 0, 0, 0, 0, 0, 0, 0]], device='cuda:0')\n"
     ]
    }
   ],
   "execution_count": 29
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "K9JaKLR7Jzbc"
   },
   "source": [
    "## 定义模型"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "CGxzT605Jzbd",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.114163Z",
     "start_time": "2025-03-19T06:32:04.109958Z"
    }
   },
   "source": [
    "class Encoder(nn.Module):\n",
    "    def __init__(\n",
    "        self,\n",
    "        vocab_size,\n",
    "        embedding_dim=256,\n",
    "        hidden_dim=1024,\n",
    "        num_layers=1,\n",
    "        ):\n",
    "        super().__init__()\n",
    "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
    "        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True)\n",
    "\n",
    "    def forward(self, encoder_inputs):\n",
    "        # encoder_inputs.shape = [batch size, sequence length]\n",
    "        # bs, seq_len = encoder_inputs.shape\n",
    "        embeds = self.embedding(encoder_inputs)\n",
    "        # embeds.shape = [batch size, sequence length, embedding_dim]->[batch size, sequence length, hidden_dim]\n",
    "        seq_output, hidden = self.gru(embeds)\n",
    "        # seq_output.shape = [batch size, sequence length, hidden_dim]，hidden.shape [ num_layers, batch size, hidden_dim]\n",
    "        return seq_output, hidden"
   ],
   "outputs": [],
   "execution_count": 30
  },
  {
   "cell_type": "code",
   "source": [
    "#把上面的Encoder写一个例子，看看输出的shape\n",
    "encoder = Encoder(vocab_size=100, embedding_dim=256, hidden_dim=1024, num_layers=4)\n",
    "encoder_inputs = torch.randint(0, 100, (2, 50))\n",
    "encoder_outputs, hidden = encoder(encoder_inputs)\n",
    "print(encoder_outputs.shape)\n",
    "print(hidden.shape)\n",
    "print(encoder_outputs[:,-1,:])\n",
    "print(hidden[-1,:,:]) #取最后一层的hidden"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.303876Z",
     "start_time": "2025-03-19T06:32:04.115158Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 50, 1024])\n",
      "torch.Size([4, 2, 1024])\n",
      "tensor([[-0.0636,  0.0092,  0.0103,  ..., -0.0153, -0.0004, -0.0341],\n",
      "        [-0.0175,  0.0283,  0.0105,  ...,  0.0142,  0.0282, -0.0413]],\n",
      "       grad_fn=<SliceBackward0>)\n",
      "tensor([[-0.0636,  0.0092,  0.0103,  ..., -0.0153, -0.0004, -0.0341],\n",
      "        [-0.0175,  0.0283,  0.0105,  ...,  0.0142,  0.0282, -0.0413]],\n",
      "       grad_fn=<SliceBackward0>)\n"
     ]
    }
   ],
   "execution_count": 31
  },
  {
   "cell_type": "code",
   "source": [
    "query1 = torch.randn(2, 1024)\n",
    "query1.unsqueeze(1).shape #增加维度"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.308401Z",
     "start_time": "2025-03-19T06:32:04.303876Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([2, 1, 1024])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 32
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "pTQ6Mz1OJzbd",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.314492Z",
     "start_time": "2025-03-19T06:32:04.309395Z"
    }
   },
   "source": [
    "class BahdanauAttention(nn.Module):\n",
    "    def __init__(self, hidden_dim=1024):\n",
    "        super().__init__()\n",
    "        self.Wk = nn.Linear(hidden_dim, hidden_dim) #对keys做运算，encoder的输出EO\n",
    "        self.Wq = nn.Linear(hidden_dim, hidden_dim) #对query做运算，decoder的隐藏状态\n",
    "        self.V = nn.Linear(hidden_dim, 1)\n",
    "\n",
    "    def forward(self, query, keys, values, attn_mask=None):\n",
    "        \"\"\"\n",
    "        正向传播\n",
    "        :param query: hidden state，是decoder的隐藏状态，shape = [batch size, hidden_dim]\n",
    "        :param keys: EO  [batch size, sequence length, hidden_dim]\n",
    "        :param values: EO  [batch size, sequence length, hidden_dim]\n",
    "        :param attn_mask:[batch size, sequence length]\n",
    "        :return:\n",
    "        \"\"\"\n",
    "        # query.shape = [batch size, hidden_dim] -->通过unsqueeze(-2)增加维度 [batch size, 1, hidden_dim]\n",
    "        # keys.shape = [batch size, sequence length, hidden_dim]\n",
    "        # values.shape = [batch size, sequence length, hidden_dim]\n",
    "        scores = self.V(F.tanh(self.Wk(keys) + self.Wq(query.unsqueeze(-2)))) #unsqueeze(-2)增加维度\n",
    "        # score.shape = [batch size, sequence length, 1]\n",
    "        if attn_mask is not None: #这个mask是encoder_inputs_mask，用来mask掉padding的部分,让padding部分socres为0\n",
    "            # attn_mask is a matrix of 0/1 element,\n",
    "            # 1 means to mask logits while 0 means do nothing\n",
    "            # here we add -inf to the element while mask == 1\n",
    "            attn_mask = (attn_mask.unsqueeze(-1)) * -1e16 #在最后增加一个维度，[batch size, sequence length] --> [batch size, sequence length, 1]\n",
    "            scores += attn_mask\n",
    "        scores = F.softmax(scores, dim=-2) #对每一个词的score做softmax\n",
    "        # score.shape = [batch size, sequence length, 1]\n",
    "        context_vector = torch.mul(scores, values).sum(dim=-2) #对每一个词的score和对应的value做乘法，然后在seq_len维度上求和，得到context_vector\n",
    "        # context_vector.shape = [batch size, hidden_dim]\n",
    "        #socres用于最后的画图\n",
    "        return context_vector, scores\n"
   ],
   "outputs": [],
   "execution_count": 33
  },
  {
   "cell_type": "code",
   "source": [
    "#tensor矩阵相乘\n",
    "a = torch.randn(2, 3)\n",
    "b = torch.randn(3, 2)\n",
    "c = torch.mm(a, b) #增加维度\n",
    "print(c.shape)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.318722Z",
     "start_time": "2025-03-19T06:32:04.315490Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 2])\n"
     ]
    }
   ],
   "execution_count": 34
  },
  {
   "cell_type": "code",
   "source": [
    "#把上面的BahdanauAttention写一个例子，看看输出的shape\n",
    "attention = BahdanauAttention(hidden_dim=1024)\n",
    "query = torch.randn(2, 1024) #Decoder的隐藏状态\n",
    "keys = torch.randn(2, 50, 1024) #EO\n",
    "values = torch.randn(2, 50, 1024) #EO\n",
    "attn_mask = torch.randint(0, 2, (2, 50))\n",
    "context_vector, scores = attention(query, keys, values, attn_mask)\n",
    "print(context_vector.shape)\n",
    "print(scores.shape)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.341720Z",
     "start_time": "2025-03-19T06:32:04.320716Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 1024])\n",
      "torch.Size([2, 50, 1])\n"
     ]
    }
   ],
   "execution_count": 35
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "6W5FeRRrJzbd",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.348515Z",
     "start_time": "2025-03-19T06:32:04.342720Z"
    }
   },
   "source": [
    "class Decoder(nn.Module):\n",
    "    def __init__(\n",
    "        self,\n",
    "        vocab_size,\n",
    "        embedding_dim=256,\n",
    "        hidden_dim=1024,\n",
    "        num_layers=1,\n",
    "        ):\n",
    "        super(Decoder, self).__init__()\n",
    "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
    "        self.gru = nn.GRU(embedding_dim + hidden_dim, hidden_dim, num_layers=num_layers, batch_first=True)\n",
    "        self.fc = nn.Linear(hidden_dim, vocab_size) #最后分类\n",
    "        self.dropout = nn.Dropout(0.6)\n",
    "        self.attention = BahdanauAttention(hidden_dim) #注意力得到的context_vector\n",
    "\n",
    "    def forward(self, decoder_input, hidden, encoder_outputs, attn_mask=None):\n",
    "        #attn_mask是encoder_inputs_mask\n",
    "        # decoder_input.shape = [batch size, 1]\n",
    "        assert len(decoder_input.shape) == 2 and decoder_input.shape[-1] == 1, f\"decoder_input.shape = {decoder_input.shape}\"\n",
    "        # hidden.shape = [batch size, hidden_dim]，decoder_hidden,而第一次使用的是encoder的hidden\n",
    "        assert len(hidden.shape) == 2, f\"hidden.shape = {hidden.shape}\"\n",
    "        # encoder_outputs.shape = [batch size, sequence length, hidden_dim]\n",
    "        assert len(encoder_outputs.shape) == 3, f\"encoder_outputs.shape = {encoder_outputs.shape}\"\n",
    "\n",
    "        context_vector, attention_score = self.attention(\n",
    "            query=hidden, keys=encoder_outputs, values=encoder_outputs, attn_mask=attn_mask)\n",
    "        # context_vector.shape = [batch size, hidden_dim]\n",
    "        embeds = self.embedding(decoder_input)\n",
    "        # embeds.shape = [batch size, 1, embedding_dim]\n",
    "        embeds = torch.cat([context_vector.unsqueeze(-2), embeds], dim=-1)\n",
    "        # embeds.shape = [batch size, 1, embedding_dim + hidden_dim]\n",
    "        seq_output, hidden = self.gru(embeds)\n",
    "        # seq_output.shape = [batch size, 1, hidden_dim]\n",
    "        logits = self.fc(self.dropout(seq_output))\n",
    "        # logits.shape = [batch size, 1, vocab size]，attention_score = [batch size, sequence length, 1]\n",
    "        return logits, hidden, attention_score\n",
    "\n"
   ],
   "outputs": [],
   "execution_count": 36
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "FG-Pid9cJzbd",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.355873Z",
     "start_time": "2025-03-19T06:32:04.349512Z"
    }
   },
   "source": [
    "class Sequence2Sequence(nn.Module):\n",
    "    def __init__(\n",
    "        self,\n",
    "        src_vocab_size, #输入词典大小\n",
    "        trg_vocab_size, #输出词典大小\n",
    "        encoder_embedding_dim=256,\n",
    "        encoder_hidden_dim=1024,\n",
    "        encoder_num_layers=1,\n",
    "        decoder_embedding_dim=256,\n",
    "        decoder_hidden_dim=1024,\n",
    "        decoder_num_layers=1,\n",
    "        bos_idx=1,\n",
    "        eos_idx=3,\n",
    "        max_length=512,\n",
    "        ):\n",
    "        super(Sequence2Sequence, self).__init__()\n",
    "        self.bos_idx = bos_idx\n",
    "        self.eos_idx = eos_idx\n",
    "        self.max_length = max_length\n",
    "        self.encoder = Encoder(\n",
    "            src_vocab_size,\n",
    "            embedding_dim=encoder_embedding_dim,\n",
    "            hidden_dim=encoder_hidden_dim,\n",
    "            num_layers=encoder_num_layers,\n",
    "            )\n",
    "        self.decoder = Decoder(\n",
    "            trg_vocab_size,\n",
    "            embedding_dim=decoder_embedding_dim,\n",
    "            hidden_dim=decoder_hidden_dim,\n",
    "            num_layers=decoder_num_layers,\n",
    "            )\n",
    "\n",
    "    def forward(self, *, encoder_inputs, decoder_inputs, attn_mask=None):\n",
    "        # encoding\n",
    "        encoder_outputs, hidden = self.encoder(encoder_inputs)\n",
    "        # decoding with teacher forcing\n",
    "        bs, seq_len = decoder_inputs.shape\n",
    "        logits_list = []\n",
    "        scores_list = []\n",
    "        for i in range(seq_len):#串行训练\n",
    "            # 每次迭代生成一个时间步的预测，存储在 logits_list 中，并且记录注意力分数（如果有的话）在 scores_list 中，最后将预测的logits和注意力分数拼接并返回。\n",
    "            logits, hidden, score = self.decoder(\n",
    "                decoder_inputs[:, i:i+1],\n",
    "                hidden[-1], #取最后一层的hidden\n",
    "                encoder_outputs,\n",
    "                attn_mask=attn_mask\n",
    "                )\n",
    "            logits_list.append(logits) #记录预测的logits，用于计算损失\n",
    "            scores_list.append(score) #记录注意力分数,用于画图\n",
    "\n",
    "        return torch.cat(logits_list, dim=-2), torch.cat(scores_list, dim=-1)\n",
    "\n",
    "    @torch.no_grad() #不计算梯度\n",
    "    def infer(self, encoder_input, attn_mask=None):\n",
    "        #infer用于预测\n",
    "        # encoder_input.shape = [1, sequence length]\n",
    "        # encoding\n",
    "        encoder_outputs, hidden = self.encoder(encoder_input)\n",
    "\n",
    "        # decoding，[[1]]\n",
    "        decoder_input = torch.Tensor([self.bos_idx]).reshape(1, 1).to(dtype=torch.int64) #shape为[1,1]，内容为开始标记\n",
    "        decoder_pred = None\n",
    "        pred_list = [] #预测序列\n",
    "        score_list = []\n",
    "        # 从开始标记 bos_idx 开始，迭代地生成序列，直到生成结束标记 eos_idx 或达到最大长度 max_length。\n",
    "        for _ in range(self.max_length):\n",
    "            logits, hidden, score = self.decoder(\n",
    "                decoder_input,\n",
    "                hidden[-1],\n",
    "                encoder_outputs,\n",
    "                attn_mask=attn_mask\n",
    "                )\n",
    "            # using greedy search\n",
    "            decoder_pred = logits.argmax(dim=-1)\n",
    "            decoder_input = decoder_pred\n",
    "            pred_list.append(decoder_pred.reshape(-1).item()) #decoder_pred从(1,1)变为（1）标量\n",
    "            score_list.append(score) #记录注意力分数,用于画图\n",
    "\n",
    "            # stop at eos token\n",
    "            if decoder_pred == self.eos_idx:\n",
    "                break\n",
    "\n",
    "        # return\n",
    "        return pred_list, torch.cat(score_list, dim=-1)\n",
    "\n"
   ],
   "outputs": [],
   "execution_count": 37
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "zE-vNp-xJzbe"
   },
   "source": [
    "## 训练"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "o55JWSvhJzbe"
   },
   "source": [
    "### 损失函数"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "c_Mmw5GAJzbe",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.360134Z",
     "start_time": "2025-03-19T06:32:04.356871Z"
    }
   },
   "source": [
    "def cross_entropy_with_padding(logits, labels, padding_mask=None):\n",
    "    # logits.shape = [batch size, sequence length, num of classes]\n",
    "    # labels.shape = [batch size, sequence length]\n",
    "    # padding_mask.shape = [batch size, sequence length]\n",
    "    bs, seq_len, nc = logits.shape\n",
    "    loss = F.cross_entropy(logits.reshape(bs * seq_len, nc), labels.reshape(-1), reduce=False) #reduce=False表示不对batch求平均\n",
    "    if padding_mask is None:#如果没有padding_mask，就直接求平均\n",
    "        loss = loss.mean()\n",
    "    else:\n",
    "        # 如果提供了 padding_mask，则将填充部分的损失去除后计算有效损失的均值。首先，通过将 padding_mask reshape 成一维张量，并取 1 减去得到填充掩码。这样填充部分的掩码值变为 1，非填充部分变为 0。将损失张量与填充掩码相乘，这样填充部分的损失就会变为 0。然后，计算非填充部分的损失和（sum）以及非填充部分的掩码数量（sum）作为有效损失的均值计算。(因为上面我们设计的mask的token是0，所以这里是1-padding_mask)\n",
    "        padding_mask = 1 - padding_mask.reshape(-1) #将padding_mask reshape成一维张量，mask部分为0，非mask部分为1\n",
    "        loss = torch.mul(loss, padding_mask).sum() / padding_mask.sum()\n",
    "\n",
    "    return loss\n"
   ],
   "outputs": [],
   "execution_count": 38
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ITY9VUiiJzbe"
   },
   "source": [
    "### Callback"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "qez1fjOPJzbe",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.366553Z",
     "start_time": "2025-03-19T06:32:04.361131Z"
    }
   },
   "source": [
    "from torch.utils.tensorboard import SummaryWriter\n",
    "\n",
    "\n",
    "class TensorBoardCallback:\n",
    "    def __init__(self, log_dir, flush_secs=10):\n",
    "        \"\"\"\n",
    "        Args:\n",
    "            log_dir (str): dir to write log.\n",
    "            flush_secs (int, optional): write to dsk each flush_secs seconds. Defaults to 10.\n",
    "        \"\"\"\n",
    "        self.writer = SummaryWriter(log_dir=log_dir, flush_secs=flush_secs)\n",
    "\n",
    "    def draw_model(self, model, input_shape):\n",
    "        self.writer.add_graph(model, input_to_model=torch.randn(input_shape))\n",
    "\n",
    "    def add_loss_scalars(self, step, loss, val_loss):\n",
    "        self.writer.add_scalars(\n",
    "            main_tag=\"training/loss\",\n",
    "            tag_scalar_dict={\"loss\": loss, \"val_loss\": val_loss},\n",
    "            global_step=step,\n",
    "            )\n",
    "\n",
    "    def add_acc_scalars(self, step, acc, val_acc):\n",
    "        self.writer.add_scalars(\n",
    "            main_tag=\"training/accuracy\",\n",
    "            tag_scalar_dict={\"accuracy\": acc, \"val_accuracy\": val_acc},\n",
    "            global_step=step,\n",
    "        )\n",
    "\n",
    "    def add_lr_scalars(self, step, learning_rate):\n",
    "        self.writer.add_scalars(\n",
    "            main_tag=\"training/learning_rate\",\n",
    "            tag_scalar_dict={\"learning_rate\": learning_rate},\n",
    "            global_step=step,\n",
    "\n",
    "        )\n",
    "\n",
    "    def __call__(self, step, **kwargs):\n",
    "        # add loss\n",
    "        loss = kwargs.pop(\"loss\", None)\n",
    "        val_loss = kwargs.pop(\"val_loss\", None)\n",
    "        if loss is not None and val_loss is not None:\n",
    "            self.add_loss_scalars(step, loss, val_loss)\n",
    "        # add acc\n",
    "        acc = kwargs.pop(\"acc\", None)\n",
    "        val_acc = kwargs.pop(\"val_acc\", None)\n",
    "        if acc is not None and val_acc is not None:\n",
    "            self.add_acc_scalars(step, acc, val_acc)\n",
    "        # add lr\n",
    "        learning_rate = kwargs.pop(\"lr\", None)\n",
    "        if learning_rate is not None:\n",
    "            self.add_lr_scalars(step, learning_rate)\n"
   ],
   "outputs": [],
   "execution_count": 39
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "wXtxS8ukJzbe",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.372475Z",
     "start_time": "2025-03-19T06:32:04.367550Z"
    }
   },
   "source": [
    "class SaveCheckpointsCallback:\n",
    "    def __init__(self, save_dir, save_step=5000, save_best_only=True):\n",
    "        \"\"\"\n",
    "        Save checkpoints each save_epoch epoch.\n",
    "        We save checkpoint by epoch in this implementation.\n",
    "        Usually, training scripts with pytorch evaluating model and save checkpoint by step.\n",
    "\n",
    "        Args:\n",
    "            save_dir (str): dir to save checkpoint\n",
    "            save_epoch (int, optional): the frequency to save checkpoint. Defaults to 1.\n",
    "            save_best_only (bool, optional): If True, only save the best model or save each model at every epoch.\n",
    "        \"\"\"\n",
    "        self.save_dir = save_dir\n",
    "        self.save_step = save_step\n",
    "        self.save_best_only = save_best_only\n",
    "        self.best_metrics = - np.inf\n",
    "\n",
    "        # mkdir\n",
    "        if not os.path.exists(self.save_dir):\n",
    "            os.mkdir(self.save_dir)\n",
    "\n",
    "    def __call__(self, step, state_dict, metric=None):\n",
    "        if step % self.save_step > 0:\n",
    "            return\n",
    "\n",
    "        if self.save_best_only:\n",
    "            assert metric is not None\n",
    "            if metric >= self.best_metrics:\n",
    "                # save checkpoints\n",
    "                torch.save(state_dict, os.path.join(self.save_dir, \"best.ckpt\"))\n",
    "                # update best metrics\n",
    "                self.best_metrics = metric\n",
    "        else:\n",
    "            torch.save(state_dict, os.path.join(self.save_dir, f\"{step}.ckpt\"))\n",
    "\n"
   ],
   "outputs": [],
   "execution_count": 40
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "lfzfWswRJzbe",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.376944Z",
     "start_time": "2025-03-19T06:32:04.373469Z"
    }
   },
   "source": [
    "class EarlyStopCallback:\n",
    "    def __init__(self, patience=5, min_delta=0.01):\n",
    "        \"\"\"\n",
    "\n",
    "        Args:\n",
    "            patience (int, optional): Number of epochs with no improvement after which training will be stopped.. Defaults to 5.\n",
    "            min_delta (float, optional): Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute\n",
    "                change of less than min_delta, will count as no improvement. Defaults to 0.01.\n",
    "        \"\"\"\n",
    "        self.patience = patience\n",
    "        self.min_delta = min_delta\n",
    "        self.best_metric = - np.inf\n",
    "        self.counter = 0\n",
    "\n",
    "    def __call__(self, metric):\n",
    "        if metric >= self.best_metric + self.min_delta:\n",
    "            # update best metric\n",
    "            self.best_metric = metric\n",
    "            # reset counter\n",
    "            self.counter = 0\n",
    "        else:\n",
    "            self.counter += 1\n",
    "\n",
    "    @property\n",
    "    def early_stop(self):\n",
    "        return self.counter >= self.patience\n"
   ],
   "outputs": [],
   "execution_count": 41
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "F2f3S6z7Jzbf"
   },
   "source": [
    "### training & valuating"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "id": "IOlJp26YJzbf",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:32:04.381455Z",
     "start_time": "2025-03-19T06:32:04.377941Z"
    }
   },
   "source": [
    "@torch.no_grad()\n",
    "def evaluating(model, dataloader, loss_fct):\n",
    "    loss_list = []\n",
    "    for batch in dataloader:\n",
    "        encoder_inputs = batch[\"encoder_inputs\"]\n",
    "        encoder_inputs_mask = batch[\"encoder_inputs_mask\"]\n",
    "        decoder_inputs = batch[\"decoder_inputs\"]\n",
    "        decoder_labels = batch[\"decoder_labels\"]\n",
    "        decoder_labels_mask = batch[\"decoder_labels_mask\"]\n",
    "\n",
    "        # 前向计算\n",
    "        logits, _ = model(\n",
    "            encoder_inputs=encoder_inputs,\n",
    "            decoder_inputs=decoder_inputs,\n",
    "            attn_mask=encoder_inputs_mask\n",
    "            ) #model就是seq2seq模型\n",
    "        loss = loss_fct(logits, decoder_labels, padding_mask=decoder_labels_mask)         # 验证集损失\n",
    "        loss_list.append(loss.cpu().item())\n",
    "\n",
    "    return np.mean(loss_list)\n"
   ],
   "outputs": [],
   "execution_count": 42
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 101,
     "referenced_widgets": [
      "267a9f8d838c4649b208914938b1bcff",
      "a9552c62dcb4441fbca47f89e939759d",
      "7dc472c2f73f4443a0e8437074e67476",
      "c46cefacefb54be7ad60ca93855de3ca",
      "9ebbd2d91c9849baaa85cff5b2b048e1",
      "63c6ee4650cb4a938e9f325113e259d1",
      "95d9e57c587f43e6ba5f656f2b2e5c71",
      "863e7ad581894502979884fe0e9e412e",
      "7d21bf5788794ed2b0b4dc4a98a5d2e9",
      "296304e90e43440094ba467cafb20896",
      "c3d8fddae7354c278c8e88291dbcee8d"
     ]
    },
    "id": "brzx2uFHJzbf",
    "outputId": "6c482ef7-5b4c-41e8-954f-6e9d0a034d1b",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:32.988952Z",
     "start_time": "2025-03-19T06:32:04.382448Z"
    }
   },
   "source": [
    "# 训练\n",
    "def training(\n",
    "    model,\n",
    "    train_loader,\n",
    "    val_loader,\n",
    "    epoch,\n",
    "    loss_fct,\n",
    "    optimizer,\n",
    "    tensorboard_callback=None,\n",
    "    save_ckpt_callback=None,\n",
    "    early_stop_callback=None,\n",
    "    eval_step=500,\n",
    "    ):\n",
    "    record_dict = {\n",
    "        \"train\": [],\n",
    "        \"val\": []\n",
    "    }\n",
    "\n",
    "    global_step = 1\n",
    "    model.train() # 切换到训练模式\n",
    "    with tqdm(total=epoch * len(train_loader)) as pbar:\n",
    "        for epoch_id in range(epoch):\n",
    "            # training\n",
    "            for batch in train_loader:\n",
    "                encoder_inputs = batch[\"encoder_inputs\"]\n",
    "                encoder_inputs_mask = batch[\"encoder_inputs_mask\"]\n",
    "                decoder_inputs = batch[\"decoder_inputs\"]\n",
    "                decoder_labels = batch[\"decoder_labels\"]\n",
    "                decoder_labels_mask = batch[\"decoder_labels_mask\"]\n",
    "\n",
    "                # 梯度清空\n",
    "                optimizer.zero_grad()\n",
    "\n",
    "                # 前向计算\n",
    "                logits, _ = model(\n",
    "                    encoder_inputs=encoder_inputs,\n",
    "                    decoder_inputs=decoder_inputs,\n",
    "                    attn_mask=encoder_inputs_mask\n",
    "                    )\n",
    "                loss = loss_fct(logits, decoder_labels, padding_mask=decoder_labels_mask)\n",
    "\n",
    "                # 梯度回传\n",
    "                loss.backward()\n",
    "\n",
    "                # 调整优化器，包括学习率的变动等\n",
    "                optimizer.step()\n",
    "\n",
    "                loss = loss.cpu().item()\n",
    "                # record\n",
    "                record_dict[\"train\"].append({\n",
    "                    \"loss\": loss, \"step\": global_step\n",
    "                })\n",
    "\n",
    "                # evaluating\n",
    "                if global_step % eval_step == 0:\n",
    "                    model.eval() # 切换到验证模式\n",
    "                    val_loss = evaluating(model, val_loader, loss_fct)\n",
    "                    record_dict[\"val\"].append({\n",
    "                        \"loss\": val_loss, \"step\": global_step\n",
    "                    })\n",
    "                    model.train() # 切换到训练模式\n",
    "\n",
    "                    # 1. 使用 tensorboard 可视化\n",
    "                    if tensorboard_callback is not None:\n",
    "                        tensorboard_callback(\n",
    "                            global_step,\n",
    "                            loss=loss, val_loss=val_loss,\n",
    "                            lr=optimizer.param_groups[0][\"lr\"],\n",
    "                            )\n",
    "\n",
    "                    # 2. 保存模型权重 save model checkpoint\n",
    "                    if save_ckpt_callback is not None:\n",
    "                        save_ckpt_callback(global_step, model.state_dict(), metric=-val_loss)\n",
    "\n",
    "                    # 3. 早停 Early Stop\n",
    "                    if early_stop_callback is not None:\n",
    "                        early_stop_callback(-val_loss)\n",
    "                        if early_stop_callback.early_stop:\n",
    "                            print(f\"Early stop at epoch {epoch_id} / global_step {global_step}\")\n",
    "                            return record_dict\n",
    "\n",
    "                # udate step\n",
    "                global_step += 1\n",
    "                pbar.update(1)\n",
    "            pbar.set_postfix({\"epoch\": epoch_id, \"loss\": loss, \"val_loss\": val_loss}) # 更新进度条\n",
    "\n",
    "    return record_dict\n",
    "\n",
    "\n",
    "epoch = 20\n",
    "batch_size = 64\n",
    "\n",
    "model = Sequence2Sequence(src_vocab_size=len(src_word2idx), trg_vocab_size=len(trg_word2idx))\n",
    "train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fct)\n",
    "test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fct)\n",
    "\n",
    "# 1. 定义损失函数 采用交叉熵损失\n",
    "loss_fct = cross_entropy_with_padding\n",
    "# 2. 定义优化器 采用 adam\n",
    "# Optimizers specified in the torch.optim package\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# 1. tensorboard 可视化\n",
    "if not os.path.exists(\"runs\"):\n",
    "    os.mkdir(\"runs\")\n",
    "exp_name = \"translate-seq2seq\"\n",
    "tensorboard_callback = TensorBoardCallback(f\"runs/{exp_name}\")\n",
    "# tensorboard_callback.draw_model(model, [1, MAX_LENGTH])\n",
    "# 2. save best\n",
    "if not os.path.exists(\"checkpoints\"):\n",
    "    os.makedirs(\"checkpoints\")\n",
    "save_ckpt_callback = SaveCheckpointsCallback(\n",
    "    f\"checkpoints/{exp_name}\", save_step=200, save_best_only=True)\n",
    "# 3. early stop\n",
    "early_stop_callback = EarlyStopCallback(patience=5)\n",
    "\n",
    "model = model.to(device)\n",
    "\n",
    "record = training(\n",
    "    model,\n",
    "    train_dl,\n",
    "    test_dl,\n",
    "    epoch,\n",
    "    loss_fct,\n",
    "    optimizer,\n",
    "    tensorboard_callback=tensorboard_callback,\n",
    "    save_ckpt_callback=save_ckpt_callback,\n",
    "    early_stop_callback=early_stop_callback,\n",
    "    eval_step=200\n",
    "    )"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "  0%|          | 0/33520 [00:00<?, ?it/s]"
      ],
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
       "model_id": "4b3bf9de11144179a8e7e4c1855e9d3f"
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\13351\\AppData\\Roaming\\Python\\Python312\\site-packages\\torch\\nn\\_reduction.py:51: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.\n",
      "  warnings.warn(warning.format(ret))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Early stop at epoch 3 / global_step 5600\n"
     ]
    }
   ],
   "execution_count": 43
  },
  {
   "cell_type": "code",
   "source": [
    "#计算模型参数量\n",
    "sum(i[1].numel() for i in model.named_parameters())"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:32.993951Z",
     "start_time": "2025-03-19T06:39:32.989949Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35212249"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 44
  },
  {
   "cell_type": "code",
   "source": [
    "33520/20"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:33.016023Z",
     "start_time": "2025-03-19T06:39:32.993951Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1676.0"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 45
  },
  {
   "cell_type": "code",
   "source": [
    "118964*0.9/64"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:33.019869Z",
     "start_time": "2025-03-19T06:39:33.016023Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1672.93125"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 46
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 430
    },
    "id": "mAKeaApNJzbf",
    "outputId": "2933f0b5-29b1-47bc-a6dd-63350da513ca",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:33.097107Z",
     "start_time": "2025-03-19T06:39:33.020864Z"
    }
   },
   "source": [
    "plt.plot([i[\"step\"] for i in record[\"train\"]], [i[\"loss\"] for i in record[\"train\"]], label=\"train\")\n",
    "plt.plot([i[\"step\"] for i in record[\"val\"]], [i[\"loss\"] for i in record[\"val\"]], label=\"val\")\n",
    "plt.grid()\n",
    "plt.show()"
   ],
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ],
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhYAAAGdCAYAAABO2DpVAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAUpFJREFUeJzt3Qd4U+X+B/Bvkm5oyx6FAmXIHiKoyJCNinqdVwW9XESvAwHFi/Oq4ALlqqjXi+sK+r+CG/UiG9my9957ldlBS0dy/s/vzWiSpiNN2tOefD/Pk+ckJ2l6+ibN+eadJk3TNBAREREFgTkYT0JEREQkGCyIiIgoaBgsiIiIKGgYLIiIiChoGCyIiIgoaBgsiIiIKGgYLIiIiChoGCyIiIgoaMJQxmw2G06cOIHY2FiYTKay/vVERERUAjKfZlpaGhISEmA2m8tPsJBQkZiYWNa/loiIiILg6NGjqF+/fvkJFlJT4TywuLi4oD1vTk4O5s2bh/79+yM8PDxozxtKWIaBYfkFjmUYGJZf4FiGBUtNTVUVA87zeLkJFs7mDwkVwQ4WMTEx6jn5ZigZlmFgWH6BYxkGhuUXOJZh0YrqxsDOm0RERBQ0DBZEREQUNAwWREREFDQMFkRERBQ0DBZEREQUNAwWREREFDQMFkRERBQ0DBZEREQUNAwWREREFDQMFkRERBQ0DBZEREQUNAwWREREFDSGCRaTFu7DjwfNOJV6We9DISIiClllvrppafl+/XEkp5lx4VIOEqvrfTREREShyTA1Fs5VXG2apvehEBERhSzDBAuzI1kwVxAREenHQMHCvrUyWRAREenGMMHC5KixYFMIERGRfgxXY8FcQUREpB8DBQvWWBAREenNQMHCvmWwICIi0o/x+ljY9D4SIiKi0GWYYGFxBAuOCiEiItKP4SbIIiIiIv0YJlg4scKCiIhIP4YJFs4KCw1MFkRERHoxTLBwtYUwVxAREenGgDUWREREpBfjBAvXzJuMFkRERHoxXrDQ+0CIiIhCmHGChaMxhBUWRERE+jFOsGCNBRERke6MEywcW/axICIi0o9hgoUrWRAREZFujBMsnFhhQUREpBvjdd7U+0CIiIhCmAHnsdD7SIiIiEKXcYKFY8u1QoiIiPRjnGDhqLJgjQUREZF+jBMsHFvmCiIiIv0YJ1hwrRAiIiLdGSZYEBERkf4MEyzYx4KIiEh/xgkWeh8AERERGSdYOLHCgoiISD+GCRbsvElERKQ/4wQLx5axgoiISD/GCRbsvElERKQ74wQLx5a5goiISD+GCRZ5yYLRgoiISC+GCRZcNp2IiEh/xgkWXDadiIhId8YJFo4tl00nIiLSj3GCBafeJCIi0p1hgoUTm0KIiIj0Y5hgwc6bRERE+jNMsHB2smCNBRERkX4MEyzyulgwWRAREenFOMGCNRZERES6M06wYB8LIiIi3RknWLDGgoiISHfGCRaOLSfIIiIi0o9xggVnyCIiItKdYYKFE5tCiIiI9GO8YKH3ARAREYUwwwQLV0sIqyyIiIh0Y5xg4dgyVhAREVWQYGG1WvHSSy8hKSkJ0dHRaNKkCV577TVo5aCWwNl5sxwcChERUcgK8+fBb731FiZPnowvv/wSrVu3xrp16zB06FDEx8dj5MiR0BNrLIiIiCpYsPjjjz/wpz/9CQMHDlS3GzVqhOnTp2PNmjUoPxNkMVoQERFViGBx3XXX4dNPP8WePXtwxRVXYPPmzVi+fDnefffdAn8mKytLXZxSU1PVNicnR12CxRkopLkmmM8bSpzlxvIrGZZf4FiGgWH5BY5lWLDilolJ8+Mrvs1mwwsvvIC3334bFotFncTfeOMNPP/88wX+zNixYzFu3Lh8+6dNm4aYmBgEy5d7zNhwzozbG1nRsy5rLYiIiIIpIyMDgwYNQkpKCuLi4oJTY/Hdd9/h66+/VqFA+lhs2rQJTz75JBISEjBkyBCfPyOhY/To0R41FomJiejfv3+hB+aveWmbgHPJaH5Fc9zUvXHQnjfU0uj8+fPRr18/hIeH6304FQ7LL3Asw8Cw/ALHMiyYs8WhKH4FizFjxuC5557Dvffeq263bdsWhw8fxvjx4wsMFpGRkeriTV6wYL5oJpN9gIvZYuGbIUDBfm1CDcsvcCzDwLD8AscyzK+45WH2txrEbPb8EWkSkSYSvbHzJhERkf78qrG45ZZbVJ+KBg0aqKaQjRs3qo6bDz74IPRmcgw4ZawgIiKqIMHiww8/VBNkPf7440hOTlZ9Kx555BG8/PLLKD81FnofCRERUejyK1jExsZi0qRJ6lLe5E2QxWRBRESkF+OsFcIaCyIiIt0ZJlg4kwWDBRERkX4MEyycTSFERESkH+MECw43JSIi0p1xggXrLIiIiHRnmGDhxPoKIiIi/RgmWHBUCBERkf6MEywcW+YKIiIi/RgnWLDzJhERke4MEyycdRaMFURERPoxXI0FkwUREZF+jBMsHFuuFUJERKQf4wQLjgohIiLSnXGCBftYEBER6c44wYITbxIREenOMMHCiU0hRERE+jFMsGDnTSIiIv0ZJljk9d7U+0CIiIhCl2GCBaexICIi0p9xggWHmxIREenOOMHCsWUfCyIiIv0YJ1g4qixYY0FERKQf4wQLx5a5goiISD/GCRZcNp2IiEh3hgkWREREpD8GCyIiIgoawwQLdt4kIiLSn3GChWPLXEFERKQf4wQLdt4kIiLSnXGChaPOgrGCiIhIP8YJFpzSm4iISHfGCRaOLXMFERGRfgwTLPKSBaMFERGRXgzXx4KIiIj0Y5hg4cT6CiIiIv0YJliw8yYREZH+jBMsHFuNdRZERES6MU6wYI0FERGR7owTLDhBFhERke4MEyycbSGssSAiItKPYYJF3mBTJgsiIiK9GCdYcNl0IiIi3RknWDi2zBVERET6MU6w4MSbREREujNMsHBiUwgREZF+DBMsOEEWERGR/owTLNh5k4iISHeGCRZOzBVERET6MV7nTVZZEBER6cZwwYK5goiISD/GCRZcK4SIiEh3xgkWrLEgIiLSnXGChWPL4aZERET6MU6w4NSbREREujNMsHBiUwgREZF+jBcs9D4AIiKiEGbAeSx0PhAiIqIQZpxg4diy8yYREZF+jBMsuFYIERGR7owTLBxb5goiIiL9GHCCLEYLIiIivRgnWDi2jBVERET6MUywcFZZsMKCiIhIP4YJFpx3k4iISH+GCRZERESkP8MEC3beJCIi0p9xgoWjMYSxgoiIqAIFi+PHj+P+++9H9erVER0djbZt22LdunUoPzUWeh8JERFR6Arz58EXLlxA165d0atXL8yePRs1a9bE3r17UbVqVeiNU3oTERFVsGDx1ltvITExEVOmTHHtS0pKQnnAGgsiIqIKFix+/fVXDBgwAHfffTeWLFmCevXq4fHHH8fDDz9c4M9kZWWpi1Nqaqra5uTkqEuwWK02tbXZbEF93lDiLDeWX8mw/ALHMgwMyy9wLMOCFbdMTJofwyiioqLUdvTo0SpcrF27FqNGjcLHH3+MIUOG+PyZsWPHYty4cfn2T5s2DTExMQiWVckmTN9vQcsqNjza0h4yiIiIKDgyMjIwaNAgpKSkIC4uLjjBIiIiAp06dcIff/zh2jdy5EgVMFauXFnsGgtpTjl79myhB+av79YewYu/7kL3ptXwxZBOQXveUEuj8+fPR79+/RAeHq734VQ4LL/AsQwDw/ILHMuwYHL+rlGjRpHBwq+mkLp166JVq1Ye+1q2bIkff/yxwJ+JjIxUF2/yggXzRbNYLK7l0/lmCEywX5tQw/ILHMswMCy/wLEM8ytuefg13FRGhOzevdtj3549e9CwYUPojZ03iYiI9OdXsHjqqaewatUqvPnmm9i3b5/qJ/Hpp59i+PDhKC8TZB06l6H3oRAREYUsv4JF586dMWPGDEyfPh1t2rTBa6+9hkmTJmHw4MHQ274z6Wp79EKm3odCREQUsvzqYyFuvvlmdSlvtp9I0/sQiIiIQp5h1gqxsXMFERGR7gwTLHJtDBZERER6M0ywsDFYEBER6c6QNRaXc6y6HgsREVGoMkywyMzOCxPJqXkzfRIREVHZMUywGNG7ieu62TB/FRERUcVimFNwo+p5C5pZzI5pOImIiKhMGSZYxEfnzWHOkadERET6MEywqBtvX9JdWDlChIiISBeGCRbuOKcFERGRPgwZLJbsTtb7EIiIiEKSIYPFrG2n9D4EIiKikGTIYLHm4Hm9D4GIiCgkGTJY3HFlPb0PgYiIKCQZKlh0rG5T25Z14/Q+FCIiopBkqGCx4Zz9z3lj1k5sOHJB78MhIiIKOYYKFu5GTNuo9yEQERGFHEMFixvq5y1ElmuzN4sQERFR2TFUsIi05F3PtXKSLCIiorJmqGAR7vbX5FhZY0FERFTWDBUswtwWNc1hjQUREVGZM1SwOJWZlywyc/L6WxAREVHZMFSwOJruVmVBREREZc5QwaK9Y4IsIiIi0oehgkXbauxXQUREpCdDBQuLV0vITxuO6XUoREREIcmwo0LE6O8263UoREREIclQwaJSuN5HQEREFNoMFSyIiIhIXwwWREREFDSGCxbv3d1W70MgIiIKWYYLFmYTJ8kiIiLSi+GCxa7TaR63l+09o9uxEBERhRrDBYt+LWt53H7gP2t0OxYiIqJQY7hgUTsuSu9DICIiClmGCxa1YiP1PgQiIqKQZbhgQURERPoxZLD44dEueh8CERFRSDJksGhWK1bvQyAiIgpJhgwWYd7LnBIREVGZMGSwCLd4/lk5Vptux0JERBRKDBksIsI8/6xnf9ii27EQERGFEkMGC28/bTyOO/69Asmpl/U+FCIiIkMLiWAhNhy5iDdm7dT7MIiIiAwtZIKFSMnM0fsQiIiIDC2kgoXVpul9CERERIYWUsFCY64gIiIqVSEVLFhjQUREVLoMGyxmjeyeb5+NVRZERESlyrDBolVCXL59zBVERESly7DBwhcrkwUREVGpCqlgsf7wBczYeEzvwyAiIjKskAoW4qlvN+t9CERERIYVcsGCiIiISg+DBREREQWNoYPFgtE99D4EIiKikGLoYNG0VixeuKmF3odBREQUMgwdLITFbPg/kYiIqNwIybPuTe8vQ3LqZb0Pg4iIyHAMHyxMPvbtOJmKiXN363A0RERExmb4YFGQrFyb3odARERkOCEbLMy+qjKIiIgoICEbLH7edELvQyAiIjKckA0WREREFHyGDxY9rqip9yEQERGFDMMHi6a1KmPpmF4+79t45AJ+XH8MKZk5ZX5cRERERhSGENCgeozP/bf/+w+17d6sBv5v2DVlfFRERETGY/gai+JYtves3odARERkCAEFiwkTJsBkMuHJJ59EefefIZ30PgQiIiLDK3GwWLt2LT755BO0a9cOFUGflrX1PgQiIiLDK1GwSE9Px+DBg/HZZ5+hatWqMIJXftmm9yEQERGFZufN4cOHY+DAgejbty9ef/31Qh+blZWlLk6pqalqm5OToy7B4nyukj7nlysPo3KkBXdcmYAG1Xx39jS6QMsw1LH8AscyDAzLL3Asw4IVt0xMmqZp8MM333yDN954QzWFREVFoWfPnujQoQMmTZrk8/Fjx47FuHHj8u2fNm0aYmLK9gQ+amXROapymIY3OlvL5HiIiIgqioyMDAwaNAgpKSmIi4sLTrA4evQoOnXqhPnz57v6VhQVLHzVWCQmJuLs2bOFHlhJkpQcV79+/RAeHu7zMXd8vApbj9trTAqz97X+CEXFKUMqGMsvcCzDwLD8AscyLJicv2vUqFFksPCrKWT9+vVITk5Gx44dXfusViuWLl2Kf/3rXypAWCwWj5+JjIxUF2/ygpXGi1bY8yZWiylWsAj1N1NpvTahguUXOJZhYFh+gWMZ5lfc8vArWPTp0wdbt2712Dd06FC0aNECzz77bL5QUd6EmTltBxERUWnyK1jExsaiTZs2HvsqVaqE6tWr59tfHjWqUUnvQyAiIjK0kPoK/9j1TfCXLg2LfJyf/VmJiIgoWMFi8eLFBXbcLG+iIyx49U9F16wkPT8Lo7/dxMXJiIiI/BRSNRb++GnjcUycu0vvwyAiIqpQQjJYRIQV788+diGz1I+FiIjISEIyWFyTVK1Yj7OxqwUREZFfQjJYvHl722I9zsZkQURE5JeQDBYyUVaTmkUPPd16PKVMjoeIiMgoQjJYiHBL0X96dq7N4/a24ykYP2snDp69VIpHRkREVHGFbLB458/ti3yMTdNwOceKZXvPqLktbv5wOT5ZegC9/rkYU1YcLJPjJCIiMvyy6UbQOiEererGYcfJgtcOycq1ocVLc9T1ypGeRTXufzswtGtSqR8nERFRRRKyNRbiqX5XFPux6Vm5pXosRERERhDSwaJfq9oB/fywqWs5/TcREZGbkA4WIsxsKvHPLtyVjH/9vi+ox0NERFSRhXywMJU8VyjvzN8TrEMhIiKq8EI+WISZ84ogAiVbdCwlIwcr95/DgTPpQTwyIiKiiifkg8WUoZ1RO8aEp8O+w6yI5xGFLL+f4/Fp63HfZ6vQ+50lpXKMREREFUXIB4trG1fHqjHX4S7LUjQ1n8DTYd/7/Rwr9p1zXc/Mtgb5CImIiCqOkA8WwhRdBc/nDFPXh1lm40rT3hI/17Q1R4J4ZERERBULg4XDaksn/GjtDrNJw9vhnyIS2SV6nv9tPhH0YyMiIqooGCwclj3bCw0GvY8zWjyamY9jZNhPJXqeTUcvBv3YiIiIKgoGC4calSPRuWUT/CPnQXX7EctMtDEdCMpzy4iRrhN+x6LdyUF5PiIiovKKwcLLXFtn/M96LcJMNkwM/xThCGwq71yrTY0YOX4xE0OnrEVGdi7Gz96JF2ZsRerlHBw5lxG0YyciItIbg4UPr+T8Fee0WLQ0H8Fjll/9/vlnftistrIy6vUTF3vc1+/dpfhkyQFMW30E7cbOQ4+Jizj/BRERGQaDhQ/nEYexOUPU9SfCZqC5yb+RHt+tO6bWENlw+IKqqXDnfVusPJA3XJWIiKgiY7AoQL1u9yOryQ2IMFkxMfwTWODf/BRv/LYTC3YWr08F1zEjIiKjYLAowDWNqyPytveRosWgnfkgHrb85tfPf778IL5YcbBEv1tqO3adSkWO1VainyciItILg0UBWtSNBWLr4NWcv6jbT4X9iCam42Xyu79aeRg3TFqGEdM2lsnvIyIiChYGCy+rX+iD+U/1QN34aHX7R1t3LLa2R6QpR02cZUbp1yJ8utQ+zHXO9lOl/ruIiIiCicHCS+24KDSrHeu2x4Tncx5CmhaNq8x78VfLXB2PjoiIqHxjsCjC4z2b4CSq483cQer2mLBv0cB0Oqi/IzvXpvpVOJlMQX16IiKiMsNgUYS/92+OmSO64fVX/4kV1taINmXjrbDPYApik8irM3dg6NS1fgeL5NTL2HDkQtCOg4iIKFAMFkUwm01oUy8eFosZz+U+hAwtEl0sOzDYsjCov2fx7jMF3vf8T1vwly/WwGaz12pI7cZnSw/g6jcX4o5//4GNDBdERFROMFj44ahWG2/l3quuPxc2HfVQcBgIhAmeVRbT1xzF0j1nsOV4iro9f8dpvDFrp+v+NQfPl8pxEBER+YvBwk9fWfthre0KVDZdxvjwz6X+IGjPveNEar597n0vnv9pq9oeLmR9kcxsK06m5J/dk4goVMjnpiyV4KzlpbLFYOEnDWY8k/MILmvh6GHZirstS4L23Dd9sAxZuZ4zfFrd/jF2nkzFL5uOQ/MKM84+GbI2ScuX56DL+N/xw/pjQTsuIqKK5LNlB9D7nSV4+ddteh9KSGKw8MMjPRqr7UGtLt7NvUtdfynsv6iF4PVxuHHSMhw5n1cjYfWa73vUN5vy/cylLHsY2XUqzbXv9d92BO2YiIgqkolzd6vtf1f5t84TBQeDhR9G9W2GR663h4vPrQOxydYEcaYMvBszNWhNIgfOXiqwxqIg7y/ci+/XHfXYZ+aYVSIKUVx/SV8MFn6IiQjDvZ0bqOs2mDEm5xFkaxZ0s63FreY/SuV3/rrpRLEeN+aHLR63zRU0V+w5nYYPFu5FRnau3odCRBUUc4W+GCz8ZHOLwnu1+vgg9w51/Z3KX6MG7KM2guk5R4dNd2/O2uXzse4dPWVsiS+Ldidj8uL9Xo8tHbJEvMy14Y/+7y3Fu/P34J15e0rtuIjI2Mri840KxmAR4Bv2Y+st2GtOQnj2RYwNlyaR8uFsepbP/UOnrMVbc3Zh6d6zQf196w6dx6mUvBAhNQ5dJ/yu5tooSc/szUcvBvX4iCh0uH/isPaz7DFY+CnM7FlkuQjD25EjAXMYbrasxlNhP1SIiriTF4M3JFVm/7zr45W4dnzepGGnU/OCTW4JgkX5L0EiKq/cv/8dPc/h92WNwcJPDavH4J5Oia4RIuJAWGOg71h1fVTYT3gz7HNY4DlstCxczsk/zXjq5RzM2HhMDUV1F4zh3duOp+DXzSew1scEXe4NMd7DY4uDVZlEFAw51tJfkZo8hXndpiKYTCa8dVc7df0Tx/LmSTUqAdeNAMJjYJ35NAaFLUJNUypG5DyBy4gss2O777NV+fa1GztPbZ/6djMOTRjo2j9pwR60T4xH64T4Ev++mz9crrb9WtXOd1+gg1IYK4goGEpSY0qBYY1FAH54tAtu65CAN+9oa9/ReRgez3kSWVo4+lnW478R4xGPdJRHyWlZGPiBPRgURCbkumHSUjWFeGH2Jxf+N5ak8oEVFkQUDKyxKHsMFgHo1KgaJt17JWrFRrn2zbV1xv3ZzyNFi0En8x78EDEOCQhuR8niOn8pO6Cff/zrDWrSrYe/WqduT19zBAM/WIbT3iM9TIWvd1KiYOH/jxAR5SPD16lsMViUgrVaCwyPeAMntWpoZj6OHyPH4gqT5wRWZaHja/MD+vm0yzket2Wtku0nUtWokqNus4P6avVwbwopSR8LVlkQUTAsC/IIOCoag0UpOWhphDuyxmGPrR7qms6rmourTXkrkuphvNuKqE7P/LAZmwoc2um7o4R0BL1j8h8e/U4K63hZkiZOxgoiooqJwaKUyKjUk6iOu7NfwRpbczX19/9FTMAA8xrdjsnZ2dTdd+uO4baPVnjsy7XacMRrBdVDXlONn0nLG07qHiue/2kLkp6f5dFcwhEeREShg8GilHRrWkNtU1AZD2Q/j3nWqxBpysHk8PdxvyWwJorS9PkuM1qOXYAeExd5TLLV85+LXdePXfAcF+5eYTF9jb3JR+a1cCpJrGAWISKqmBgsgmzZM70w6Z4OePnm1qgUYUGDajHIQgQey3kS03J7w2zS8Hr4FIwO+67cVfjvS07H1gtFvyW2HEvxa8EzzRbY1OlERFRxcB6LIEusFqMuYsPL/WAxmdD0xdmwwoIXcofhlFYNo8N/wMiwn1EbF9U+uU9v/168D3tP5y277o9LRUyZez4jG/Ex4UjJzEFkmBlR4UX/vcwVREQVE2ssSlFkmAVhFvciNuED6x14LuchWDUT7glbjE/C30UUfK/rUZbenrMbMzYWbyVVb0VNmTts6lo1A2j7cfPQ+fUFJTxCIqLikS8w7h6cula3YwlFDBZlrFfzmvjG2huP5IzGZS0cfS0bMS3iDVRByWoLKoIDZy+p6b9FWlYu5mw7id7/XOza52R1Gz7CCgsiKinvGs/fdyXnW9aASg+DRRm6pX0Cpgy9Wl1fYLsKE+u8DVtkFXQ078OPEWNRD2dgVO4TZj363w0qbDz29XrXvoU7T6PJC7PyjSQ5fO4Spqw4iGmrj5TxERNRRVWiuXMoaBgsysDAdnXV9uHuSWo7sk8ztZjZ8L/cD/OwuUBcfTQxn8RPka+gp3kTjMjXOiaZ2Xm9Ood9aZ/d00lm/Jw4dxeun7gY4/63Ay/M2Kr6aMi3jj/2n0V2rk3VfIz9dTvSs3Jdw2Q5tJWIfH0M8KOh7DBYlIF/3Xcltoztj3b1q6jbo/tdgSVjeqFapQigVgtg2DzssiWitukipka8jS/DJ2BIUy71+9Gi/fmaSp75YQsGfbYar/+2Q9V8TP3jENq8MhcpGTno9MYCjPzGmMGMiIqvXf38iyuyFqPsMFiUAZmZMi4qvOAHxNfDXdmv4NPcgcjWLLjesgXjjv8Nr4ZNQVWkwqiyc634Zs0RfL4s/8RdvsgIG1mmXXy18rDHfT9vOo6LGTn4n+N+d6zFIAotdeLz1m9y4iKnZYfBopyoW6sW3swdjH7ZE4EWNwOaFX8Jm48lkaMxzPIbwlH4kM6KKPVyLp77aSte/614U53f/m/PGUILCw/OdU6kuURGoyzZY9z+K0Tkydd3idUHzulxKCGJwaKccM4xdVirA9z7NTDkf0Cdtmoq8JfCv8a8iDHob5YhU6Ebu6XDZ0HcS+W3LSfRduw8vL9gr2oukQAz5Av9plInIv2DhXc/Lio9DBbl9R8hqQfwtyV4JudhnNHikWQ+jU8j3sO08DfQynRIp6Msv9yrOYdP26C27y3Yo98BEZFu2J9CXwwW5ZnZgu+svdAz611sbjQMuaYIXGfZgZkRL2JC2KeoiYJWJQ09VlsJ5g33g4xIyShihlEiKh/YrUpfDBYVwCVE40KX5xA2ch1srW5X643cG7YYiyJH43HLL4hENprXjkUoe3PWrlJ7bgkU0k+j9Stzg/q87hOCEVHw8D9LXwwW5fwfQea7EB0bVgWqNoT5z1OBB+dhk60JKpsu45nwb/FH5Wcx9epj/HcqJQfOXAr6t6APFu5F61fmYNcp4476KS6Z7p0omFhjoS8Gi3JuwejrsePVAZ7DVRtcg9uzx+HJ7MdxUquG6rmnUXf+Y/g+YhyuN2+GCTbU9THciopn/eELSE677PM+W5BqGd6dvweXc2x4fWbxRsQY1fQ1R9Bu7Dw15Dgz24rVB8/DypMCBYhDzPXF1U3LuXCLWV28aTDjZ1s3zMnqjF037AVWTELnnD34MuItHLDVga31Q7j9j0ZIg73Gg2SmTyuiIwpfWXXtofO4++OV6vqhCQN9LududpuenALz/E9b1VaGHC/dexZL95zBgHpm3OLjtVu69wy6N6uBmAh+bFHhGCv0xRqLcqKkNQyXEQn0fBYYsR7/yb0RqVo0GptPoemG17EqcjheD/sP7qzP6nZxIsU+m6k0P6w/fN7nY5bvPVvoc1gL+SYk042fygA2Hr2I8bN3Fquzp3OYcUlcuJStTri+yPTmMg2694Rhxy5k4B8/b8XBQobuBssXyw+q31XQt0fvY5dQIZadzl8oz/64BY/833qM/nZzKR0tGUlB77m/fcUhp2WBwaKceOvOdujbsha+fuiakj1BXAJey30A12Z9hA+iH0dGfDNUMmXh/rCFeOfso1hU8x08nbgLFoTuCn9mx1n8hknLcOfklWqBMwkD7h9CRX3T2XMqvcAPrT9/ugbjN4ep7SdLDuDD3/cVa1bWt+fswmszd/j1t8gU5le+Nh/tX53n8/6fNhxXC7eNmL7Rte9cehaGTV2H/646gj9/Yq+VKU2vztyhfte6wxd83l/QMfjKWs4ZV+dsPxWUY5PXcPS3m/DKL9uC8nxUvhT0fzxvx2kM/jz/ukUUXAwW5URClWh8PqQzujatEdDzZCAKsyJvxOZb5uDe7H9glvVqwGRBUtp6jDjzKpZFjsLjlp9RHZ5LlocC7xOWLHDW4qU5SHp+Fo6ez1D73EPDcz9uwUNek+rc8q/l6tvz/jPp+GrlIeRY84a57jyV5vHYvafTizwm+Zb+78X78Z/lB7HntOfPO204cgEDP1jmMXPgthP2108WY/PlTHpWvpqYq15fgN2O33EmzfP+0pR+OX/NjdQabT1euu9BCY1frz6MExfzr7tz6FwGftp4HF+uPMzROQZUWBeLFfvOsQ9GKWOwMCD5n2mbWAWrbK0wLvpZ4MktQPengZjqSDCdxzPh3+GPyBHY1/lXtDN5LvQVCjUWvrw9d3e+fd+sPYoFO09jb7LnCf+7dcfQ550lePmX7aq6P1jNHFk5vkPCPZ+sxPYTqbjn07xvWgU9tbMGxvt3f7So6NoTp/OXsvHAf1a7aglKY7Kie93+ltLy3vw9eHHGNgyYtDTffe6BkIynqNjgniXl/2Xx7mScSvHdYZtKOViMHz8enTt3RmxsLGrVqoXbbrsNu3fn/0AmfckHeeXIMGwfNwDLnukNxNcH+rwMPLUDT2U/poaqRppyEbb1G/wa+RJ+jngJt5uXIQLGHvbXY+KiIvsW+Poic/icvTajoBEkxSVNEc41THwxm/OPQLn/89XIKcYwiR/XH1NBQGpghkxZC5NX9PAn5Mhy9cv2nsVIRzPKlmMX0eufizF/x2mURK5VUzUU7iNqZMG4YFm29wwe/modklM9TwzO9WHSfNSYSCdcJ357NZ6iXlP3WippHvnrlLXoMmFhGRxZaPCre/WSJUswfPhwFS5yc3PxwgsvoH///tixYwcqVapUekdJ+cjS6zJk8fXb2hT4mEqRXi9veBRm2LpjRnZ3tDftwy9X74S27Ud0wH50iJiMF7WvMct6DebZOmG1rSVyDDhoqLC+DOrbvo/vOpMW7C3wZ45dKN7y9ulZuaopoqDRJs4aFel0efRCJpJqVFL9CZbvO+vzQ/GC24l5x4lUPP39Zo/mFWdHyJIEiwuX8p5b1lhxnqDl5F3QsRdm4tzd2Jucjjb14jBzRHcE2wP/sa8DE2Y2YfL9V7n2F3ZuYZYIbe7B0vm/wvdE8Ph15pgzZ47H7alTp6qai/Xr16NHjx5BPCwqysg+zTD4mgaoXjky333t61cp8uc3a02B20fB1O81YMOXOLHwI9VMIiuq/gXz1eiSxbYOmG+9Sm2NMmz1913JPvfL6Am5xEX5F6Z2nCx4xI18w5fOga/c0lp1FC1qLgwJFrJomjS/yLDKa5Kq5XvMx0v2Y9IC+xwY7iM9iuJdgyHOpmehhtv7R/pdPPXtJo8w470q7Nztp9A6IQ71q9rfD6dTLyMqzIL4GLd5VrxIqBDbjqcGbUKt2Mgw1fHV+9iKu16E+4nl8PkMNKlZOSjHRuWD++tbULNjVLh96DnzRPAF9JU0JcXe+apatfwfgE5ZWVnq4pSaav9wycnJUZdgcT5XMJ+zvIuLNHv8vbNHXIeZW09hWNeGxSoH9ZjIKkCXUfj7jmtQ9dRyPF1tJeqmb0Nc9jncalmpLlZTGFbktlQ1GQusHXEK1WFUshKqvwora+kcKJd37mrr2rfukO8hrftOp6hQIaQpQi7eJszOP3V5dk5ukcfnq2p49f4zKsxck1QV5y5lY8Q3W7DLqwOqNxnyKRLio3DCrU1672v9URwX0zNxPiM7oDKVCbXu6lgP429v7bFf8tqinafQrWn1fAHO+/ly3MpM+ssU9/jLu1D8HCzJRHYymuqhbo3w937NYHNbZ8j9vBTqZehLccvEpJWwgVFejFtvvRUXL17E8uXLC3zc2LFjMW7cuHz7p02bhpgYY3wLrkhGrbRnyYdbWNGmqns7s/2DWebiktd20faDaHl5A/qb16GJ+aTHc2yxJamaDAkau7XEQroShob3u+R6lG1ZuynRillHLYUe3+QdZuxKMZd6GTjJe+mpVWFBez5fZeur3OtX0jCoiRUJMcCEzRacyrS/N9+5Jhdhbn/+/lTgg+1hBf4+qtj+td2MvanFe793qGbDpvP2x/J9ULiMjAwMGjRIVSrExcUFP1g89thjmD17tgoV9evX96vGIjExEWfPni30wEqSpObPn49+/fohPLzgatlQN/g/a3H8YibmjeqGCPdPWq8y3HMmE7dNtvfc3zMqCQt/+RI1T/yOjqa9ahE0pyO2mphv64T5tquw1tYcVhQ+s6URybfdWVtPYdR3W1AerXz2enR5a0mp/g7vb/zS7HH7xyUf+bFgVBfUr1YZD0xZh/pVojBj00mfv1M6w3Z8Y1G++yLDzMhyG4rbsk4sfh3eRV3/ffcZPPLfjYUef0XFz0G7+79Yi9UHL+CJno3xr8UHiv1zH93XHr2aVWMZFkDO3zVq1CgyWJToK8UTTzyBmTNnYunSpYWGChEZGaku3uQFK40XrbSe1yi+faSLo2ai4FoGKb9W9aJQKzZStcFH1G2Ftve8gm5v9UANpKC3xV6T0c28DQ3MZzDMPBvDMBsXtMpYY2uBTbam2KQ1wRZbY7Uyq9E98vWmAvtulAelHSqc7xkZnbH7VBqGdUtCVoCjOVMu25CenIG1hy5gbQGPybKZMGGu74617qHCOcfIsv3n0btFbbz0S/4OvDmayVBThfNz0P751iIh3q+fGj59M7o2qY7ulVmGvhS3PPz6T5LKjREjRmDGjBlYvHgxkpKSSnp8pBPp8GYpRsuF1GaseK43LI4OctJZ74FrG+L/Vh3Gd9Ze6hKNy+hh3op+lvXobd6AaqZ0DLCsUxdhgxl7bPXU8NZNWlMVOPZo9dV+fzWtVRn7HJ0Ay5vyHCrKyo3vL8NOR0fW5nViC53foziS07JQu0rhtV9t/FzG/sGp6zBmQHP13N5SMnMMFSxCnVZIp+WirNh/DpvDLBh2V9APK2T49Z8kQ02lb8Qvv/yi5rI4dcreCzs+Ph7R0cb/ZhpqvBc/e+22NuqycOdpDPtyHTIRhbm2zuoiU4V3MO1DR/NedDDvw8CqJ2BOPYYW5qPqci8Wq+e4pEViq9ZYhYyNEjhsTXEaBXf+dZLhl+U1WBBcoUJILcN+x1LzJfXCz9vRsUFVBJsMfaXQSRaFVMwWKj03tPuNlWmwmDx5str27NnTY/+UKVPw17/+NeCDoYqhV/NauP3KeogKN2P6mqNqn/StWK81x8Abb0ejxtVk2AByL57Ae19OV4Ej9txmtMF+VDZdxrWmnbjWnLdcuCz9rmo1bE2xS2uA67tci9dXXPKo2bi1fUKJJ2iisvXBwoLn/SgumadjYYjVBEmN8Jd/HMIVtWNxnZ9T+6/YdxYnUy7jT+1ql9rxVcThpoEs8kcl53dTCJHZbMJ793TAvuQ0V7CQGgVZSO1qt7kXwqokYMyop9X1Tq8vwPn0TDQ1HUd7835cadqHDub9aG46grqm86hrOY8bLY7W9PXA4MgwHNFq46BWBwe0urg55wz+V+kiNl6qgTOQeTr4iUHGsnL/OYz9n73/hz8TkcmEaYM/X62ut6hl76Dqa/K34V9vQM/mNfFAl0YwurwzVck/J75efQR/7dYkSEcUWtioSEEx/6keCPNqOnH3+ZBOePT/1iM3ogW+P5uIWRF9cCnbqvpptDUdVM0nEjiamk6giSUZkchGM9NxNMNx+xPMnIlPZRsFpGtRKnAccoSOg7a6uK1vD4ycl4ZUcAZY8l95+M50tBgTnfny+bK8UQ/uc4s4vwxKYNl07KKqAZJLSASLINRYjJ25C52SaqBNPf86gBKDBQUgsVqMasOMjQovdJSJ6CCLor3QxzV5Ta5NwxX/mK36aazRWmKNtaW0pyij+zTByM4xuP/taUgynURj00kMbWEFzu2H9fwh1ZzS1nQIbXEo7xcs/Te2RAFntDjs1+phny0B+2TruH5K9eNgLQcFRr75y4JVXZrUQHx0+Rgx8O1ae62h8M5HP286jqe+zZvuvaAaj1UHzqFt/XjERRXvb/rvqsOqr5VMoe6cwbIwMlV9YV88CiIr09aNj8o3y6r4acMx1IqNQrdmns1GWblW1yJj8lPSCb3rhN9REjd/uLxE09iHOgYLKrHIMAu2j7tBLZ7l6x+/sKaUiEKCSIMasUCVelhua4vlsM9YOfR++z/3seQL2LZtMwbUScdbX/+mgkeS6TS6VLkApJ1ETVOqurj34RBpWjT2a46w4QodCaq5pSSjVMhYilth8dacXZiy4hBa1Y3DrFHdS63W5P9WHsKVDar6/W3Zu7l67rai+yVJv45XZ+5AizqxmPNk8ZZm+MfP2xzHeRgP92hc5AJ5spaNP88vpq0+ghdmbMVfujTEq3/yXBNp7+k0jP7OHpjcT/wyuqfzGwuQ7RhuLJ9L9apEBxwmixOeKA+DBQUkOiJ4/3DfPdIFaw+dVx01C9KwVlU07G3vPPyZNS+cHHp6IHq89iviMg7jjvrpiE0/gLj0A6pPR0PTacSaMtHBtF8tuOY+h1eWFo4DWh17LYeWgP22BBzQElRTS4a0u1BIyMwu3oyLMzYed60RI/N2dG9Ws1SO56VftrtOmhIWxvywRa1YPPZWz2nM89H8O2HK+jBSqyFkOveM7FykZubit60ncXen+j5rMP5wW0tGTuTSqTqhShRaO+aMWH/4vJog7c+dEtXng3OBvMKmi5e/ccKcXWhQLQaDr2mo9o2fbf9y8NXKwypYSFhwTuonHVV9WbQr2RUqEKQ6ygNnLqkOyY/1bIL2iUWvw0QMFlSOSMdP986f/vrxyQHYdPQiereopZYz//MnK9X+cOSikemUChnqYj6htk1MJxBlykFL01G0RF51svtolYO2Ojio1VV9OZyXY1rNkJxh1Mj6vrsUXz14tVr87Z15e1RNwQ1t6uR7nKyt4jT21+1Y+LTnCLmiyAlUTtyFLdrmbffpNPyw/pi6/uLAlmoYuDyPr1pC91yx/0y6WiHX2z/n7laLx8mibd5r47R6OW9ukA1HLuCjQR3z/fwgR0dRsfV4Cv61aJ8rBDlrGcTBs5fyBaHVB86p0PLsDS1UsNl5Mg25NhtyrBo+WWLvK3Jnx/r2GgK3P2bBjtN46Kt1eOP2Nip4uL8OThIoDpzxHJIejFEhj329HofPZaiyZLNI8TBYkO6kVWTJmF759sdGhSHtcm6RVZnhjhm/asZGol8r+3C7aLeqy7rV4rD3fBj2ao5ZYh19OcywoZ7pjOowag8dJ9DYfEL16ahuSnONVrkOnjM15mgWHNFqqZoNqe1QNRyOAHIG8q2NfTkqor98sQb3dErEt+vsIdPXScT7hPbzxuPqZPZUvyuK1Rz48i/b1SRzg65pgHPpWXj7zvawWEy4lJWLHB8LZ0lAvnPyH67bb87aiZvbJWDYl2vx4k0tcXcnWasnz4u/bEdutgU/nduAQ+d8dwZ1BoGieA/vlr4LEgTcyUyr7sbP2ukxBNbbPZ+uck25/tky35OotXhpDj6+/yqPkCQnd/HijG0qWPyw/qjHir/yOshr473asK8A4i8JFeQfBgvS3f3XNlQdQb39+Nh1+GjRPozq06zQn3+sZ9NC71/49PVo9uLsfPulb8VRrba6LMKVHvfFIx1JplNobDqBJPMp1ZejiemkqvmINmWr602Qf/2KTC0CZ7V4nEW82kpn0nOO664L4tQ2RY1gYQgpT5yhQnR763csf7a3x/3uXYMkSDz57SZ1XeaduLZxdSSnXsYbs3aquV4qRYahb8tayLbaVH8kIaFCyDd7UTtut+oIKZlCgrQ391AhpH/HrK0ncTEjRzWPeAeL85dk9UkTluzxvYKuP6QGQDpdTv3jEK5rUgNvz92FxbvPeDzGWshwmrPpWQWuMlpQqHB69L/rUamQZtafN50odMVfF/576YLBgnQza2R39SH5aE/fY8VloqD37/U84fsiNRX5frZOZVSNCVe9xr1nEC2OFFS2T0OuNZUE4mKCDXVwQdVs2IOHfdSKBI/6pjMqdCSaziARnh/AvmRrFhU6zmlxrjByXotVHU1ljZU0RCNdi0Y6onFJi0IaYtTtS4hS+9jptHQdu5CJHSdS0SohDqO+2Yhf3E5m3uSb+ojezVR1vXA+tlqlCJy/lI3Ff++JRjXyD4WWJgnnuVdq54rD+1xdmiNlm/oI5O6kj4a7tKxcj0nOOrw6r8S/W4ajO7nnlyN+1CCU51yhaZp96vq4kvflmrriIOpVjXHV1JYXDBakG/nAlktJTR7cEUv3nsW9nT2/tQn5hrjmxb6utU6CRYMZJ1EdJ23VscIxYsUpAjmobTqPmkhBDVMKqptS1aJtct1+ybsdb8pAhMmKujivmlxKQqZHT3cLH7KV8JGsVcFprSqS4dhqVXFaq4ILiC3nH7Xlz74z6agdF+kzVLhPMb/5WIorVLiTUCH+s/ygmg7f29zt/s8m634y//PHK1VfhvLKuw9HMPSYmH8124JI341gktdc1i0Khnfn78GHv+/DSze3Ugv3+Wvb8ZQSTahWFhgsqMK6sW1ddSlISWoqApGNcHvTCmoX+TVSQkh1pKK6I3TUlC1SUdWUhsrIRGVTJirhstrGym1koJLpMmJhDySikikLlZCF2qaLxTq+LC1MzVrqDB55oSMvhMiFTTR5rDZbgSMQ/CFNIK/c0grBtuZQyUJpafhqpdu8MkEm896UhPQLCaa+7y5RJ3HpeCqjRVrWjS2wb41WQAdbJwkV4rWZOwoNFtIcpRaP9BqifyoI78vSwmBBFCRP922KdxbsK3YIUTUfWnW/67IllEj4qOQKHZmO0JGJeFM6apkuohYuorbpgrrUMl1QnVEjTbmoj7Oobyq8/f2yFo5TWjWcRlU1MkZd1+R6dbWV2xJEQmFkTFGTS/nj+on2hfiMSjqmljclWd20OGTE2ZZjKer6f4dd45qkyxkm1hw8rx7zyPWN8fyNLVUtk3S4ve/qBripbV21HEJRDp+7hMwcK4ZNXYeYCAvmPdXDI6j4+tiQPmkycZgM3a9eOX8TcVlhsKCQ9dfrGuH6K2qiV4ta6naj534L6PlkDH5ZkFByXi5aXjPSa7e2Rv1qMRg6xbHeCoAalSNVBzrnkNuajrChgocjdNSG5+1qpnQ1BLeR6TQaoeBqeptmUiNgnKFDts7Aka2FQYNJXWxuW7jdlg9F6SOiOZqXnLfl/hStMpK1eJxHnKHCy/GLmXofQshxnoffubu9mk9DvvQP7ZqkmqZKSkKDM1SI+/+zWtViyLwbMjpl5ojurqHuMoS2Wa1YNbeH/MyWY1vVyV/67xRGRgl5B1GptZERcEv3nFEje6RjsNMT0zYgzGxydWqV2pAi5zwpRQwWFJJev62NGo0STJHhnk0vVzWsqoYL+mPzK/3Rfpx/Hd4m3tUu3+gA8X/DrsaN7y9T13MQhhOogRNaDbw0sBUmL96Hb/52rZq/wWnMgOZ4f+42FTKk70cd03kVNqQPiPQdqWO6YN+HCwg3WVFbBZXiNcOUhFUzqXBxRquCMzLCBo6tj9upkFDH5hvy5HxH3HlVfXWRZgUZhlt4sNAKfS85Q4M7mSjMOQ/HP362z+Ph9PfvNyPObcSPr1AhoVNGBcmEZDKk+NOlB/IflQZsPZaihkU7A47TzC2eI9RkJA+DBVEZ2v36Da7hf0WRGo0le/JGeMg//ws3tcTzP+V9eMhUxfKB0KNZDURZNFy2mtTEQgPb1cX0NUc8Huur1kQ+BJxKsv5EQS0pLevG4YWbWuDNWZ7D8aQ998GujfK1/w7t2ggT5+7GMa0WjqFWgU8s9Q7VkYY6pnOusGG/XEAtXIDFUS9hNjnrLZwX+9wh9rqLvP1mt/stsKKqKR3VkQKLSVMdYaX/SVGk+UYFDtcwX6/rKojYr1+GflXEVLa838KyXol0vpTQ/cB/7CdoUQ2p6GPZgP7m9ehm3orLiMBhx+rKamuTrcxVU0eNGPPm/mVgro8OuUV1YnWuZfLePe19hgox+PNVWHvIvy8qemGwoJAwc0Q3taCQLO9e3FAhvJc0uaF1HdVOKhdZN0ImR5o8+Cq1/klOTg5eutKKpPZd0KWpfarnu6+qX2iwGNC6juqE5WuGRJna/J0/t/c5B4eHQvpoPNy9sUewaFPP3nzic9bGYvb1kCggQ2NTzVWxzWofi/unDgn4ZyHDMZ36tqyNBTs9P3jH39E2XxlJAKmGNNQ0XbRfkKKabFy3pcMr7Ns4U4ZqvinuMF9ZHddZ42EPHvbAIX+TXE/XYpCFcGQjTG3VdS3vtjRFGamJxshsBbypZSr2RNNp9DevQ3/LenQy7VZB1ika2SrgqiUAvFzUKqmVlQ9pte1bFTrsIeSiCh2mUunTU1FChWCwoJAgUzSXZEhWw+oyQiLvZNWnZd54cZmW2FvlcKBzo6quE7d8Q5IakkW7zuDd+bux57TnlMOFiYsOK3Bky4jeTV29yt17iw9oXVt9Yxp8TQOfAeKKWjLkNE/7+vFqqGRBH8LTHr4Gf/9us1r5cnivprj1XysQCOcsqe4kpDmDxSPdk/DJsoOqv4VzkrGdmmeTVZ8WtZBQJdo12VQksl1BQ8KHfZSNPYy4X5etBBBZHVcuSYX0ISlOM00WIlTYkKCR5QoeEch1zC9ir4Nxz33O2/Y+Jp6PyXushBgZGixzmlxAZVyQrVZZ7VPX1bayms+EzT+F83hLy42Tm4Fdv6nLskjPzqZbbY0w39oJC20dYYUZDU0yMd4ptdaQTJInW2kWrGK6lLfukJcULUYFjNNaNZzTYlVTnvSFOud43WTOGrl9HrHqvWJUDBZEPnRrWgPL953FE72b4pqkauoEHxcdrkKDv6SGRNadkJO+rK3gfnKW875WQJWDs0f7L8O74sWft2J/sr2XuHi6f3PVmWvl/nOqycVp0j1XYtXBc+jSuLrvg/E6D0mHT2ew8FWLITMu/vG8fbl7d58+cBWemLbRY+ltX/4xsKXqROqcoVJmpJy9LX/tjKwRI23Gd3ZMUMFCfPHXTqgbH+3qJ+IktU7/uLmVWnNC5omQD2hZv+UYanrU3kjTzosr3IdAamoEjX1oryNwOIf7Iu+6PEbiQaQpR43AUdeR7fGNVq5LvYZcfJVrWZDhw86QYQ8c9q3MZZIDC3K1MOTCYr+utvbb6romjU559+UizPEzst+smrMsJnt3WovXRfaFqUfZ3B6nufaJNC1G9XtJ0SohFZUc2xjVxFCmhWXLAQ4sdoSJWUCqfc0VkauZsdrWEvNsnbDA2hHH5f3jZpdmD+fuonEZDUzJ9s7NKnRI+DiNhuZTSDCdV/PTdDBJU4bv5gzvmjN5vc6pEClhwxFAHCHEWWapWowaAp6qVVKvbUWYGI/BgshBOlhJW6gM7ZI22Kxcm1oMqbC5MvwhJ+529T1XR6xfNRpVon1/c3EGDllRUXqaS5PJteMXFlpjIqtJysm7wGPw+lC/vUM9/LblpGp3ltUzJZCsPHCuwJ9f94++aqRJizqeE5sVtKz0Q90bu46rVmxkvllSpWlEfPPwtWrWxpgw4JWOuejW43o0q1MFJ3yMpEiqmX8GS19euaW1mgI7jwnpavbSGBxEXb+H+copV4JGpDNsmLIdocNzn7Mfif03al5b3/vsNLdq+DT7BemubTVTGqqY0lUTkYQeGT4ss8BK35aKQsKQ/WRpDxx5J80YVwCRk6fUAEkQylKBJ+8iI46y3W+rffJYe3CSiwSc7uat6G9ZhwGztgA5buuHhMcATfsCLQbiqukm1V9C/t8zrMWb7yITUditNVAXb1HIcoSOU6rmTF6naiaZqyYVVZGmhnzLbdkvnZ+dNWfFab5zl6pFO8rOHjxUAHErP+c+ZPUAIj1rKMsKgwWRwzd/64J/ztuNp/vbF5Qq6GQZLHKSrV81BmNuaI7D5y/hHh8ziLqrEx+lVuCUmpOSGtnHc12VPi1rYfao7mhY3T5UVhZ/au+YhnnDS/3y/bzUPsjFe52IRo6fL6wvidMzNzTHpAV78f49HdDfsV/6qEjHVemnUi1Sns8eHtzP/VOGdsbmoxfVQmHq2FvUwvfrj6ljL2ihqPlP9UC/9/JGvjiHBR857//CUvINP1NdHFMw+wompTm/ttsviUaWOkGpoOEeQExpaj4TqZsIc9QshJlyVa8QuW7fyn1WdXKTsOS8z36/NOTY1Ldiq+tir8Xw2KeZXfukxiPvPnt0lVqfeNMlxOGSY5uBMJNNhaGaSEVNk+diYaVGlk6JqQE0vxFocTPQ+Hog3L6oYedN61R/H2nikxqzuz/OP9rDH9IpeI+WqC6F01R5yGslk+TJ66cCCOyvnwQRWatI+g7F45JrG2Oy147FmTIRh0ygiPlokDWKwYJIbzK9+Bd/7Vzqv6dG5QicTc/Gq47hYHKillDjJEt3L9t7Vq3i6K3HFZ7VtSXrM5JHApSMHnGS5bynDu2MCItZrXNRGPfmjwe7JWFvcnqh62k4Pd6zqboUh/siVtIk5V4bI8Pp2iVWQf9WtfHXKWux02tlS9GsdizuuzoR09ccxVt3tlWTE42cvrFEwaL8MKlwc1wummfzT/mlqZlk5YQaZ7rkccJUtx3hQ7ZqdllIGMpFuCnX0RxlDz2e++yByb7Ps8bhkK025to6oUXPe3F974GAOf+XhH8NulKthtq+fhXVT0n6QuWq5dv34fD+PfjlcGl9sTC5ahwOo06xXz/5O6VsnIHNO3jEuZWh3N8jSlZa1geDBVEZWzD6etWJs6D+Gl8OvRqpl3NQJUafzl09C2lKKUhMRJhaME5mGZS1HGRlTGfNQiCkJsN13asPiKwe+oBjLpL/PdEVV74632MRLKc3b2+LJ3o3Q70q0QFND02BMKmF9eSiZpsVQXwZZAi0M4xIPYqcuGXve9Xb+wwVQmokOzao6tEXKjIMGNGrCWZl7sYv9r7B5UYOwlydQYtTfjsRCfs7vuwxWBCVMQkMUvVa2MlUr1DhDxl6K8NkZSIw9+aa3a/doBbGcjZnBCIhPgoD29ZVk48V1jQlo28a16zk6ojqXSvjDBVCmpykRujKBlWw8UjpTfBFZUeGQEsnXu+RFsUdQm1EaZdzVN8mPZT/7qVEFPAIF3Fnx/pBfd63726HN25vg8/+0infibxxzcoetQ0lJc/10eCOePfPHYp8rNSYSLgQz92Yv2OrkwSVOU92x/SHrw34+Mg+Y2t5FcrBwhyE/7+SYo0FkcFNvr+jmj20T4u8OTiCQWYb9dUPRC+NalTC70/3LFZY8R7VYiQt4m3YlVI23xmXjumFBtVj1IytepO5W75efcRjXwjnClgKWVm1tLHGgsjgYqPCcXO7BN2qRUPdB/ddWaa/r2ON0judvvvn9h63JVT4MvfJHihrI/s0K/bMm6HAzGBBRFT2Fowu/ROgjFq5rkkBE5aVgiqluBTKtY2rq9lai9K8Tiwiwgo+vchKnIUZ3quJ38fm8xlDN1fArOPZncGCiEJW01qxaqG50iSdTmW2UPdJxkpThFnD+hd6YdbI7gU+RhbOKwmZSr2oc3ViNUdH2UIeKBOyFbZvzIAWuLpR/g7OT/bNXyshejWvicpuK4g6FTSrbXEMvjrwUU16cp/qv6wxWBBRSPt8iGfn0+Iub+8PWcVW3NimjmuCMadt4wbgu0e6eIxc8Sa1BL+N7Fas31UvRtaZCVfzsqx+If907P8e3BGfPpD3N1eNCVfDduUYP/fqiOscHt2vVW3874ni/f5pD11b6Eldnst7+niZmM37NOj98zIr7JN9r1Adb5/qe4Vr//v3dlDNTTLkWSZEc9e/Vd7EbP565eYWaur6isrMphAiIn3IOjBdm3o2VTRxmza8b8ta+RZQk1lCZVXW4pKJuraPG6BO6u7ks1+mUi9s+HHz2rH46fGuaJ0Qr0a7PNg1qcA+DNte7gP3rjS146I8nlsmGZNwI30jfnzsOnX7vw9dg9dua6MmHOvbqrbPmgQZ+SML0Ym/97ePAnHOIeJu0j0dkFjN3u+id4tarhoMmbpehiV/NKgjJnuVgQQrWUvH+zzo3cFWZol17m9ZN6/G5U8d6ql+RM5ydpLfU7WISd4KI+HHVy1IYVPqF6aPozzKio65gqNCiIjcyYyj7t+V372nA+ZsPaX2P/TVOtf+u66qn2+5d2cV9MS72mH0d5vzTejlHkxSMnPQIbGKzxPBwfE34VTqZdSJi1JDJp1DBx+93t73YM/ptHy/96WbWyHSx1wfEmY6vb7Atcqvs7ZATvTfPpI342txyeyvUmMja+s4SdDZeOQCbm2f4Nr39p3t0bnRUbVPamnk1/pa6E6ClZh4V3vc99kqV3CRqd9jIi34ZIl9QS/3fphS6yEn9nY++nu0qhuHnadS0a2ZfZh1ICpFFHyKlNf/rTm71HU5ji1ec6h8/pdOrveLhCf5O9+dtxvHLmTip43HUdpYY0FEpCP3k9Yfz/X26B8gw2r/3DlRTf7lztfH9swR3VTNhPMbdEFmPH6danpwr8F4rKc9NNzUVr69m9TKrrL1NR+Br+nWh3VL8vm7vJtegkGCkXtIkM6a917dwONYZXp4WYSuVlyU2u8rVLiTxfa2jh2gpocXUoYyk6tTmFuNkTyXlFdXxxwt7v43oht2vnpDka9BcUhgKGgGWfcF9WJ91Gy0cKtVcR756P7N8fcymPfj8Z5NVE2cXhgsiCjkuQcL6Wzpq3dA64Q43H1VfdewRqmZkBoFWR3T/Vui/HzP5jVVE0RBJ3uZQEyaHiQ8OA26uoHqz/DBvUUPT5Ww8EkFbv/3p8Oh9KeQPiZFLdLn/hzBWkBQAsxbd7XLt1/6dbiT133Fc73xRK+m6vfLqr3u7yn3TCUdYIujoD43rs6xhXjGz2aaYGOwIKKQN+S6hq4F4IT0CfB1kpl4d3uM7neF6/ayZ3th48v98p0M5NuidMiU5onikueT/gwyPXlxyIqx91+bf/luX6rEhHssU6+3R69vrLYDWhd9PKP6NsMvT3RTnTP1ssatE6yERWeTz8s3t0Kt2EgVEuW1l9qIna/egM/+4hn67Gu+5nnxJntNzMPd84Lnor97Tu72/aNdcGjCwHzHIqvzlnfsY0FEIe+GNnWx+O89Uc8RKN6+qx1en7kTQxyjOQrirG6Wb6tZOVZV/V+WpJ+BfQK0uoU+bsmYXjh6PkP1sSiKrAL73vy96NKkuuooWhqkw6UsAFbcb+96k+Yc6cAq/UikD4izWUeabYZ2beTRzBPhmL8j0m0eD+9WoId7NMYDXRqqFYI/W3ZQhRIZkizb06mXseqFPq4mLGle+2jRPlUDdvR8Jjo2rIIV+86p+/7e/wr8c94eVXP25h1t8ODUvD5AemKwICJyTAnuJE0UskZJcRU2VLQ0SagozugE6RMRX4xQIe7p3EBdSptz9EhFIR1G5eKtoL4jteKiMKJ3U9Us46tpxrlvx6sDXAF18ZieKmy4P17C4OT782pA5mw75bo+vFdTXNe0hqrpio0Mw6g+zYoVHksbgwUREVEpeNoxwqUw7k08EjD86R4iocZ96fenHM10emMfCyIiogqiQQWo6WGNBRERUQXRKiEOH953Zbnun8JgQUREVIHc4jYRWXnEphAiIiIKGgYLIiIiChoGCyIiIgoaBgsiIiIKGgYLIiIiChoGCyIiIgoaBgsiIiIKGgYLIiIiChoGCyIiIgoaBgsiIiIKGgYLIiIiChoGCyIiIgoaBgsiIiKquKubapqmtqmpqUF93pycHGRkZKjnDQ8PD+pzhwqWYWBYfoFjGQaG5Rc4lmHBnOdt53m83ASLtLQ0tU1MTCzrX01ERERBOI/Hx8cXeL9JKyp6BJnNZsOJEycQGxsLk8kU1CQlYeXo0aOIi4sL2vOGEpZhYFh+gWMZBoblFziWYcEkLkioSEhIgNlsLj81FnIw9evXL7XnlzcC3wyBYRkGhuUXOJZhYFh+gWMZ+lZYTYUTO28SERFR0DBYEBERUdAYJlhERkbilVdeUVsqGZZhYFh+gWMZBoblFziWYeDKvPMmERERGZdhaiyIiIhIfwwWREREFDQMFkRERBQ0DBZEREQUNIYJFh999BEaNWqEqKgoXHPNNVizZg1C0dKlS3HLLbeomdFkZtOff/7Z437pq/vyyy+jbt26iI6ORt++fbF3716Px5w/fx6DBw9Wk8NUqVIFw4YNQ3p6usdjtmzZgu7du6vyllnq3n77bRjB+PHj0blzZzUzbK1atXDbbbdh9+7dHo+5fPkyhg8fjurVq6Ny5cq48847cfr0aY/HHDlyBAMHDkRMTIx6njFjxiA3N9fjMYsXL0bHjh1V7/OmTZti6tSpqOgmT56Mdu3auSYX6tKlC2bPnu26n2XnnwkTJqj/4yeffNK1j2VYuLFjx6oyc7+0aNHCdT/LrwxoBvDNN99oERER2hdffKFt375de/jhh7UqVapop0+f1kLNrFmztBdffFH76aefZLSPNmPGDI/7J0yYoMXHx2s///yztnnzZu3WW2/VkpKStMzMTNdjbrjhBq19+/baqlWrtGXLlmlNmzbV7rvvPtf9KSkpWu3atbXBgwdr27Zt06ZPn65FR0drn3zyiVbRDRgwQJsyZYr6uzZt2qTddNNNWoMGDbT09HTXYx599FEtMTFRW7hwobZu3Trt2muv1a677jrX/bm5uVqbNm20vn37ahs3blSvSY0aNbTnn3/e9ZgDBw5oMTEx2ujRo7UdO3ZoH374oWaxWLQ5c+ZoFdmvv/6q/fbbb9qePXu03bt3ay+88IIWHh6uylOw7IpvzZo1WqNGjbR27dppo0aNcu1nGRbulVde0Vq3bq2dPHnSdTlz5ozrfpZf6TNEsLj66qu14cOHu25brVYtISFBGz9+vBbKvIOFzWbT6tSpo02cONG17+LFi1pkZKQKB0L+SeTn1q5d63rM7NmzNZPJpB0/flzd/ve//61VrVpVy8rKcj3m2Wef1Zo3b64ZTXJysiqPJUuWuMpLTpTff/+96zE7d+5Uj1m5cqW6LR9EZrNZO3XqlOsxkydP1uLi4lxl9swzz6gPP3f33HOPCjZGI++Vzz//nGXnh7S0NK1Zs2ba/Pnzteuvv94VLFiGxQsW8sXIF5Zf2ajwTSHZ2dlYv369qtJ3X49Ebq9cuVLXYytvDh48iFOnTnmUlcz7Lk1HzrKSrTR/dOrUyfUYebyU6erVq12P6dGjByIiIlyPGTBggGoyuHDhAowkJSVFbatVq6a28l6TZZXdy1CqWRs0aOBRhm3btkXt2rU9ykcWN9q+fbvrMe7P4XyMkd6zVqsV33zzDS5duqSaRFh2xSdV9VIV7/13sgyLR5p3pTm4cePGqllXmjYEy69sVPhgcfbsWfUB5v4mEHJbTqKUx1kehZWVbKVN0V1YWJg6sbo/xtdzuP8OI5CVeKVtu2vXrmjTpo3r75NAJeGrsDIsqnwKeox8eGVmZqIi27p1q2q7lrbnRx99FDNmzECrVq1YdsUkYWzDhg2qv483lmHR5IuS9HeYM2eO6vMjX6ikP5isysnyKxtlvropUUX61rht2zYsX75c70OpUJo3b45Nmzap2p4ffvgBQ4YMwZIlS/Q+rApBluoeNWoU5s+frzpGk/9uvPFG13XpSCxBo2HDhvjuu+9Uh3UqfRW+xqJGjRqwWCz5evXK7Tp16uh2XOWRszwKKyvZJicne9wvvaFlpIj7Y3w9h/vvqOieeOIJzJw5E4sWLUL9+vVd++Xvk+a3ixcvFlqGRZVPQY+RkRQV/cNPvhFKL/mrrrpKfetu37493n//fZZdMUhVvfz/yWgDqSmUi4SyDz74QF2Xb8UsQ/9I7cQVV1yBffv28T1YRip8sJAPMfkAW7hwoUcVttyWdl3Kk5SUpP4h3MtKqu6k74SzrGQr/3TyAef0+++/qzKV5O98jAxrlbZKJ/mGJd9Uq1atiopM+rxKqJDqe/m7pczcyXstPDzcowylb4m04bqXoTQHuAc0KR/50JEmAedj3J/D+RgjvmflvZOVlcWyK4Y+ffqov19qfJwX6e8k/QSc11mG/pGh8vv371dD7PkeLCOaQYabysiGqVOnqlENf/vb39RwU/devaFCepPLECm5yMv77rvvquuHDx92DTeVsvnll1+0LVu2aH/60598Dje98sortdWrV2vLly9XvdPdh5tKz2oZbvrAAw+oYYRS/jL0ygjDTR977DE1HHfx4sUew9UyMjI8hqvJENTff/9dDVfr0qWLungPV+vfv78asipD0GrWrOlzuNqYMWNUr/SPPvrIEMPVnnvuOTWC5uDBg+r9JbdlRNG8efPU/Sw7/7mPChEsw8I9/fTT6v9X3oMrVqxQw0ZluKiM8BIsv9JniGAhZByxvFlkPgsZfipzMISiRYsWqUDhfRkyZIhryOlLL72kgoGEsT59+qj5BtydO3dOBYnKlSurIVZDhw5VgcWdzIHRrVs39Rz16tVTgcUIfJWdXGRuCycJYY8//rgaRikfLrfffrsKH+4OHTqk3XjjjWp+D/lQkw+7nJycfK9Vhw4d1Hu2cePGHr+jonrwwQe1hg0bqr9JPozl/eUMFYJlF3iwYBkWToZ91q1bV/1d8tkkt/ft2+e6n+VX+rhsOhEREQVNhe9jQUREROUHgwUREREFDYMFERERBQ2DBREREQUNgwUREREFDYMFERERBQ2DBREREQUNgwUREREFDYMFERERBQ2DBREREQUNgwUREREFDYMFERERIVj+H9CmD5GnJiLpAAAAAElFTkSuQmCC"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "execution_count": 47
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UOSROA66Jzbg"
   },
   "source": [
    "## 推理\n",
    "\n",
    "- 翻译项目的评估指标一般是BLEU4，感兴趣的同学自行了解并实现\n",
    "- 接下来进行翻译推理，并作出注意力的热度图"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 875
    },
    "id": "cX75BqcBJzbg",
    "outputId": "e9dcbb2c-5188-4f2e-a1ac-793fe5f40b19",
    "ExecuteTime": {
     "end_time": "2025-03-19T06:39:33.389708Z",
     "start_time": "2025-03-19T06:39:33.098104Z"
    }
   },
   "source": [
    "# load checkpoints,如何上线\n",
    "model = Sequence2Sequence(len(src_word2idx), len(trg_word2idx))\n",
    "model.load_state_dict(torch.load(f\"./best.ckpt\", map_location=\"cpu\"))\n",
    "\n",
    "class Translator:\n",
    "    def __init__(self, model, src_tokenizer, trg_tokenizer):\n",
    "        self.model = model\n",
    "        self.model.eval() # 切换到验证模式\n",
    "        self.src_tokenizer = src_tokenizer\n",
    "        self.trg_tokenizer = trg_tokenizer\n",
    "\n",
    "    def draw_attention_map(self, scores, src_words_list, trg_words_list):\n",
    "        \"\"\"绘制注意力热力图\n",
    "\n",
    "        Args:\n",
    "            - scores (numpy.ndarray): shape = [source sequence length, target sequence length]\n",
    "        \"\"\"\n",
    "        plt.matshow(scores.T, cmap='viridis') # 注意力矩阵,显示注意力分数值\n",
    "        # 获取当前的轴\n",
    "        ax = plt.gca()\n",
    "\n",
    "        # 设置热图中每个单元格的分数的文本\n",
    "        for i in range(scores.shape[0]): #输入\n",
    "            for j in range(scores.shape[1]): #输出\n",
    "                ax.text(j, i, f'{scores[i, j]:.2f}',  # 格式化数字显示\n",
    "                               ha='center', va='center', color='k')\n",
    "\n",
    "        plt.xticks(range(scores.shape[0]), src_words_list)\n",
    "        plt.yticks(range(scores.shape[1]), trg_words_list)\n",
    "        plt.show()\n",
    "\n",
    "    def __call__(self, sentence):\n",
    "        sentence = preprocess_sentence(sentence) # 预处理句子，标点符号处理等\n",
    "        encoder_input, attn_mask = self.src_tokenizer.encode(\n",
    "            [sentence.split()],\n",
    "            padding_first=True,\n",
    "            add_bos=True,\n",
    "            add_eos=True,\n",
    "            return_mask=True,\n",
    "            ) # 对输入进行编码，并返回encode_piadding_mask\n",
    "        encoder_input = torch.Tensor(encoder_input).to(dtype=torch.int64) # 转换成tensor\n",
    "\n",
    "        preds, scores = model.infer(encoder_input=encoder_input, attn_mask=attn_mask) #预测\n",
    "\n",
    "        trg_sentence = self.trg_tokenizer.decode([preds], split=True, remove_eos=False)[0] #通过tokenizer转换成文字\n",
    "\n",
    "        src_decoded = self.src_tokenizer.decode(\n",
    "            encoder_input.tolist(),\n",
    "            split=True,\n",
    "            remove_bos=False,\n",
    "            remove_eos=False\n",
    "            )[0] #对输入编码id进行解码，转换成文字,为了画图\n",
    "\n",
    "        self.draw_attention_map(\n",
    "            scores.squeeze(0).numpy(),\n",
    "            src_decoded, # 注意力图的源句子\n",
    "            trg_sentence # 注意力图的目标句子\n",
    "            )\n",
    "        return \" \".join(trg_sentence[:-1])\n",
    "\n",
    "\n"
   ],
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: './best.ckpt'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mFileNotFoundError\u001B[0m                         Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[48], line 3\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;66;03m# load checkpoints,如何上线\u001B[39;00m\n\u001B[0;32m      2\u001B[0m model \u001B[38;5;241m=\u001B[39m Sequence2Sequence(\u001B[38;5;28mlen\u001B[39m(src_word2idx), \u001B[38;5;28mlen\u001B[39m(trg_word2idx))\n\u001B[1;32m----> 3\u001B[0m model\u001B[38;5;241m.\u001B[39mload_state_dict(\u001B[43mtorch\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43mf\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m./best.ckpt\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmap_location\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mcpu\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m)\n\u001B[0;32m      5\u001B[0m \u001B[38;5;28;01mclass\u001B[39;00m \u001B[38;5;21;01mTranslator\u001B[39;00m:\n\u001B[0;32m      6\u001B[0m     \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__init__\u001B[39m(\u001B[38;5;28mself\u001B[39m, model, src_tokenizer, trg_tokenizer):\n",
      "File \u001B[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\torch\\serialization.py:1425\u001B[0m, in \u001B[0;36mload\u001B[1;34m(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)\u001B[0m\n\u001B[0;32m   1422\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mencoding\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m pickle_load_args\u001B[38;5;241m.\u001B[39mkeys():\n\u001B[0;32m   1423\u001B[0m     pickle_load_args[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mencoding\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mutf-8\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m-> 1425\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[43m_open_file_like\u001B[49m\u001B[43m(\u001B[49m\u001B[43mf\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mrb\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mas\u001B[39;00m opened_file:\n\u001B[0;32m   1426\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m _is_zipfile(opened_file):\n\u001B[0;32m   1427\u001B[0m         \u001B[38;5;66;03m# The zipfile reader is going to advance the current file position.\u001B[39;00m\n\u001B[0;32m   1428\u001B[0m         \u001B[38;5;66;03m# If we want to actually tail call to torch.jit.load, we need to\u001B[39;00m\n\u001B[0;32m   1429\u001B[0m         \u001B[38;5;66;03m# reset back to the original position.\u001B[39;00m\n\u001B[0;32m   1430\u001B[0m         orig_position \u001B[38;5;241m=\u001B[39m opened_file\u001B[38;5;241m.\u001B[39mtell()\n",
      "File \u001B[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\torch\\serialization.py:751\u001B[0m, in \u001B[0;36m_open_file_like\u001B[1;34m(name_or_buffer, mode)\u001B[0m\n\u001B[0;32m    749\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m_open_file_like\u001B[39m(name_or_buffer, mode):\n\u001B[0;32m    750\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m _is_path(name_or_buffer):\n\u001B[1;32m--> 751\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43m_open_file\u001B[49m\u001B[43m(\u001B[49m\u001B[43mname_or_buffer\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    752\u001B[0m     \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m    753\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mw\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m mode:\n",
      "File \u001B[1;32m~\\AppData\\Roaming\\Python\\Python312\\site-packages\\torch\\serialization.py:732\u001B[0m, in \u001B[0;36m_open_file.__init__\u001B[1;34m(self, name, mode)\u001B[0m\n\u001B[0;32m    731\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__init__\u001B[39m(\u001B[38;5;28mself\u001B[39m, name, mode):\n\u001B[1;32m--> 732\u001B[0m     \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__init__\u001B[39m(\u001B[38;5;28;43mopen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mname\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m)\u001B[49m)\n",
      "\u001B[1;31mFileNotFoundError\u001B[0m: [Errno 2] No such file or directory: './best.ckpt'"
     ]
    }
   ],
   "execution_count": 48
  },
  {
   "cell_type": "code",
   "source": [
    "translator = Translator(model.cpu(), src_tokenizer, trg_tokenizer)\n",
    "translator(u'hace mucho frio aqui .')"
   ],
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "translator(u'esta es mi vida.')"
   ],
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "name": "python3",
   "language": "python",
   "display_name": "Python 3 (ipykernel)"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "colab": {
   "provenance": [],
   "gpuType": "V100"
  },
  "accelerator": "GPU",
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "267a9f8d838c4649b208914938b1bcff": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "HBoxModel",
     "model_module_version": "1.5.0",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_a9552c62dcb4441fbca47f89e939759d",
       "IPY_MODEL_7dc472c2f73f4443a0e8437074e67476",
       "IPY_MODEL_c46cefacefb54be7ad60ca93855de3ca"
      ],
      "layout": "IPY_MODEL_9ebbd2d91c9849baaa85cff5b2b048e1"
     }
    },
    "a9552c62dcb4441fbca47f89e939759d": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "HTMLModel",
     "model_module_version": "1.5.0",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_63c6ee4650cb4a938e9f325113e259d1",
      "placeholder": "​",
      "style": "IPY_MODEL_95d9e57c587f43e6ba5f656f2b2e5c71",
      "value": " 23%"
     }
    },
    "7dc472c2f73f4443a0e8437074e67476": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "FloatProgressModel",
     "model_module_version": "1.5.0",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "danger",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_863e7ad581894502979884fe0e9e412e",
      "max": 33420,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_7d21bf5788794ed2b0b4dc4a98a5d2e9",
      "value": 7599
     }
    },
    "c46cefacefb54be7ad60ca93855de3ca": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "HTMLModel",
     "model_module_version": "1.5.0",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_296304e90e43440094ba467cafb20896",
      "placeholder": "​",
      "style": "IPY_MODEL_c3d8fddae7354c278c8e88291dbcee8d",
      "value": " 7599/33420 [09:31&lt;23:51, 18.03it/s, epoch=3, loss=1.2, val_loss=1.26]"
     }
    },
    "9ebbd2d91c9849baaa85cff5b2b048e1": {
     "model_module": "@jupyter-widgets/base",
     "model_name": "LayoutModel",
     "model_module_version": "1.2.0",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "63c6ee4650cb4a938e9f325113e259d1": {
     "model_module": "@jupyter-widgets/base",
     "model_name": "LayoutModel",
     "model_module_version": "1.2.0",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "95d9e57c587f43e6ba5f656f2b2e5c71": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "DescriptionStyleModel",
     "model_module_version": "1.5.0",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "863e7ad581894502979884fe0e9e412e": {
     "model_module": "@jupyter-widgets/base",
     "model_name": "LayoutModel",
     "model_module_version": "1.2.0",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "7d21bf5788794ed2b0b4dc4a98a5d2e9": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "ProgressStyleModel",
     "model_module_version": "1.5.0",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "296304e90e43440094ba467cafb20896": {
     "model_module": "@jupyter-widgets/base",
     "model_name": "LayoutModel",
     "model_module_version": "1.2.0",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c3d8fddae7354c278c8e88291dbcee8d": {
     "model_module": "@jupyter-widgets/controls",
     "model_name": "DescriptionStyleModel",
     "model_module_version": "1.5.0",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
